Merge branch 'akpm' (patchbomb from Andrew Morton)

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 7 Aug 2014 04:14:42 +0000 (21:14 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 7 Aug 2014 04:14:42 +0000 (21:14 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 7 Aug 2014 04:14:42 +0000 (21:14 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 7 Aug 2014 04:14:42 +0000 (21:14 -0700)
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt

index 49b8551a3b68e00de3e9819f4a3b20c6cf4b3fa3..e48c57f1943bab162530416afb35f3acdff1729d 100644 (file)
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -818,7 +818,7 @@ RCU pointer/list update:
         list_add_tail_rcu
         list_del_rcu
         list_replace_rcu
-       hlist_add_after_rcu
+       hlist_add_behind_rcu
         hlist_add_before_rcu
         hlist_add_head_rcu
         hlist_del_rcu
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index 883901b9ac4f11d0ea47d42769c8ee3c6e3e3360..9344d833b7ea231d42ecf7731851f71f9602033e 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1716,8 +1716,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         7 (KERN_DEBUG)          debug-level messages
  
         log_buf_len=n[KMG]      Sets the size of the printk ring buffer,
-                       in bytes.  n must be a power of two.  The default
-                       size is set in the kernel config file.
+                       in bytes.  n must be a power of two and greater
+                       than the minimal size. The minimal size is defined
+                       by LOG_BUF_SHIFT kernel config parameter. There is
+                       also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
+                       that allows to increase the default size depending on
+                       the number of CPUs. See init/Kconfig for more details.
  
         logo.nologo     [FB] Disables display of the built-in Linux logo.
                         This may be used to provide more screen space for
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl

index 78c9a7b2b58fdb0a55f72c23890e07dcf8f912b2..8f961ef2b4577d4b7485e84bd379dc5ebd022f47 100644 (file)
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -47,6 +47,10 @@ use constant HIGH_KSWAPD_REWAKEUP            => 21;
  use constant HIGH_NR_SCANNED                   => 22;
  use constant HIGH_NR_TAKEN                     => 23;
  use constant HIGH_NR_RECLAIMED                 => 24;
+use constant HIGH_NR_FILE_SCANNED              => 25;
+use constant HIGH_NR_ANON_SCANNED              => 26;
+use constant HIGH_NR_FILE_RECLAIMED            => 27;
+use constant HIGH_NR_ANON_RECLAIMED            => 28;
  
  my %perprocesspid;
  my %perprocess;
@@ -56,14 +60,18 @@ my $opt_read_procstat;
  
  my $total_wakeup_kswapd;
  my ($total_direct_reclaim, $total_direct_nr_scanned);
+my ($total_direct_nr_file_scanned, $total_direct_nr_anon_scanned);
  my ($total_direct_latency, $total_kswapd_latency);
  my ($total_direct_nr_reclaimed);
+my ($total_direct_nr_file_reclaimed, $total_direct_nr_anon_reclaimed);
  my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async);
  my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async);
  my ($total_kswapd_nr_scanned, $total_kswapd_wake);
+my ($total_kswapd_nr_file_scanned, $total_kswapd_nr_anon_scanned);
  my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async);
  my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async);
  my ($total_kswapd_nr_reclaimed);
+my ($total_kswapd_nr_file_reclaimed, $total_kswapd_nr_anon_reclaimed);
  
  # Catch sigint and exit on request
  my $sigint_report = 0;
@@ -374,6 +382,7 @@ EVENT_PROCESS:
                         }
                         my $isolate_mode = $1;
                         my $nr_scanned = $4;
+                       my $file = $6;
  
                         # To closer match vmstat scanning statistics, only count isolate_both
                         # and isolate_inactive as scanning. isolate_active is rotation
@@ -382,6 +391,11 @@ EVENT_PROCESS:
                         # isolate_both     == 3
                         if ($isolate_mode != 2) {
                                 $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+                               if ($file == 1) {
+                                       $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned;
+                               } else {
+                                       $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned;
+                               }
                         }
                 } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
                         $details = $6;
@@ -391,8 +405,19 @@ EVENT_PROCESS:
                                 print "         $regex_lru_shrink_inactive/o\n";
                                 next;
                         }
+
                         my $nr_reclaimed = $4;
+                       my $flags = $6;
+                       my $file = 0;
+                       if ($flags =~ /RECLAIM_WB_FILE/) {
+                               $file = 1;
+                       }
                         $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed;
+                       if ($file) {
+                               $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED} += $nr_reclaimed;
+                       } else {
+                               $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED} += $nr_reclaimed;
+                       }
                 } elsif ($tracepoint eq "mm_vmscan_writepage") {
                         $details = $6;
                         if ($details !~ /$regex_writepage/o) {
@@ -493,7 +518,11 @@ sub dump_stats {
                 $total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN};
                 $total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
                 $total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+               $total_direct_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+               $total_direct_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
                 $total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+               $total_direct_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+               $total_direct_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
                 $total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
                 $total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
                 $total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -513,7 +542,11 @@ sub dump_stats {
                         $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN},
                         $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD},
                         $stats{$process_pid}->{HIGH_NR_SCANNED},
+                       $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+                       $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
                         $stats{$process_pid}->{HIGH_NR_RECLAIMED},
+                       $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+                       $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
                         $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
                         $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC},
                         $this_reclaim_delay / 1000);
@@ -552,7 +585,11 @@ sub dump_stats {
  
                 $total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE};
                 $total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+               $total_kswapd_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+               $total_kswapd_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
                 $total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+               $total_kswapd_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+               $total_kswapd_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
                 $total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
                 $total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
                 $total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -563,7 +600,11 @@ sub dump_stats {
                         $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE},
                         $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP},
                         $stats{$process_pid}->{HIGH_NR_SCANNED},
+                       $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+                       $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
                         $stats{$process_pid}->{HIGH_NR_RECLAIMED},
+                       $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+                       $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
                         $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
                         $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC});
  
@@ -594,7 +635,11 @@ sub dump_stats {
         print "\nSummary\n";
         print "Direct reclaims:                         $total_direct_reclaim\n";
         print "Direct reclaim pages scanned:            $total_direct_nr_scanned\n";
+       print "Direct reclaim file pages scanned:       $total_direct_nr_file_scanned\n";
+       print "Direct reclaim anon pages scanned:       $total_direct_nr_anon_scanned\n";
         print "Direct reclaim pages reclaimed:          $total_direct_nr_reclaimed\n";
+       print "Direct reclaim file pages reclaimed:     $total_direct_nr_file_reclaimed\n";
+       print "Direct reclaim anon pages reclaimed:     $total_direct_nr_anon_reclaimed\n";
         print "Direct reclaim write file sync I/O:      $total_direct_writepage_file_sync\n";
         print "Direct reclaim write anon sync I/O:      $total_direct_writepage_anon_sync\n";
         print "Direct reclaim write file async I/O:     $total_direct_writepage_file_async\n";
@@ -604,7 +649,11 @@ sub dump_stats {
         print "\n";
         print "Kswapd wakeups:                          $total_kswapd_wake\n";
         print "Kswapd pages scanned:                    $total_kswapd_nr_scanned\n";
+       print "Kswapd file pages scanned:               $total_kswapd_nr_file_scanned\n";
+       print "Kswapd anon pages scanned:               $total_kswapd_nr_anon_scanned\n";
         print "Kswapd pages reclaimed:                  $total_kswapd_nr_reclaimed\n";
+       print "Kswapd file pages reclaimed:             $total_kswapd_nr_file_reclaimed\n";
+       print "Kswapd anon pages reclaimed:             $total_kswapd_nr_anon_reclaimed\n";
         print "Kswapd reclaim write file sync I/O:      $total_kswapd_writepage_file_sync\n";
         print "Kswapd reclaim write anon sync I/O:      $total_kswapd_writepage_anon_sync\n";
         print "Kswapd reclaim write file async I/O:     $total_kswapd_writepage_file_async\n";
@@ -629,7 +678,11 @@ sub aggregate_perprocesspid() {
                 $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
                 $perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP};
                 $perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED};
+               $perprocess{$process}->{HIGH_NR_FILE_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED};
+               $perprocess{$process}->{HIGH_NR_ANON_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED};
                 $perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED};
+               $perprocess{$process}->{HIGH_NR_FILE_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+               $perprocess{$process}->{HIGH_NR_ANON_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
                 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
                 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
                 $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
diff --git a/Makefile b/Makefile

index d0901b46b4bfd15aedfd3a0ccfa2d3c3255ff8fc..a897c50db515d7adf8dec34f7c3298c9988be953 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -621,6 +621,9 @@ else
  KBUILD_CFLAGS  += -O2
  endif
  
+# Tell gcc to never replace conditional load with a non-conditional one
+KBUILD_CFLAGS  += $(call cc-option,--param=allow-store-data-races=0)
+
  ifdef CONFIG_READABLE_ASM
  # Disable optimizations that make assembler listings hard to read.
  # reorder blocks reorders the control in the function
@@ -636,6 +639,22 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
  endif
  
  # Handle stack protector mode.
+#
+# Since kbuild can potentially perform two passes (first with the old
+# .config values and then with updated .config values), we cannot error out
+# if a desired compiler option is unsupported. If we were to error, kbuild
+# could never get to the second pass and actually notice that we changed
+# the option to something that was supported.
+#
+# Additionally, we don't want to fallback and/or silently change which compiler
+# flags will be used, since that leads to producing kernels with different
+# security feature characteristics depending on the compiler used. ("But I
+# selected CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!")
+#
+# The middle ground is to warn here so that the failed option is obvious, but
+# to let the build fail with bad compiler flags so that we can't produce a
+# kernel when there is a CONFIG and compiler mismatch.
+#
  ifdef CONFIG_CC_STACKPROTECTOR_REGULAR
    stackp-flag := -fstack-protector
    ifeq ($(call cc-option, $(stackp-flag)),)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c

index 1f88db06b133c1f253cfc6ced41483cf672ce4a0..7a996aaa061e99fb5d5f3875da51890b0862b1c1 100644 (file)
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -26,6 +26,7 @@
  #include <linux/io.h>
  #include <linux/vmalloc.h>
  #include <linux/sizes.h>
+#include <linux/cma.h>
  
  #include <asm/memory.h>
  #include <asm/highmem.h>
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c

index 25c350264a41012bba72d3d992674aea17d32fba..892d43e32f3b5995936a24d91b4229cb38e78afb 100644 (file)
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -631,7 +631,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
  
         pgdat = NODE_DATA(nid);
  
-       zone = pgdat->node_zones + ZONE_NORMAL;
+       zone = pgdat->node_zones +
+               zone_for_memory(nid, start, size, ZONE_NORMAL);
         ret = __add_pages(nid, zone, start_pfn, nr_pages);
  
         if (ret)
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile

index ce569b6bf4d8c0f7ad7f87b76a9b1effd9e573de..72905c30082e65a025398a69561d96728ae2714b 100644 (file)
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -90,7 +90,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
         book3s_hv_rm_mmu.o \
         book3s_hv_ras.o \
         book3s_hv_builtin.o \
-       book3s_hv_cma.o \
         $(kvm-book3s_64-builtin-xics-objs-y)
  endif
  
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c

index 68468d695f12ab864281f19a3cfd872a6976fc8d..a01744fc3483160092d45c26764daeeaf2bfb041 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,6 @@
  #include <asm/ppc-opcode.h>
  #include <asm/cputable.h>
  
-#include "book3s_hv_cma.h"
-
  /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
  #define MAX_LPID_970   63
  
@@ -64,10 +62,10 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
         }
  
         kvm->arch.hpt_cma_alloc = 0;
-       VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
         page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
         if (page) {
                 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+               memset((void *)hpt, 0, (1 << order));
                 kvm->arch.hpt_cma_alloc = 1;
         }
  
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c

index 7cde8a6652056c26f4e05343439c90ca43328188..6cf498a9bc987d62ba335c2e10aabe00dc3f4384 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,12 +16,14 @@
  #include <linux/init.h>
  #include <linux/memblock.h>
  #include <linux/sizes.h>
+#include <linux/cma.h>
  
  #include <asm/cputable.h>
  #include <asm/kvm_ppc.h>
  #include <asm/kvm_book3s.h>
  
-#include "book3s_hv_cma.h"
+#define KVM_CMA_CHUNK_ORDER    18
+
  /*
   * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
   * should be power of 2.
@@ -43,6 +45,8 @@ static unsigned long kvm_cma_resv_ratio = 5;
  unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
  EXPORT_SYMBOL_GPL(kvm_rma_pages);
  
+static struct cma *kvm_cma;
+
  /* Work out RMLS (real mode limit selector) field value for a given RMA size.
     Assumes POWER7 or PPC970. */
  static inline int lpcr_rmls(unsigned long rma_size)
@@ -97,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
         ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
         if (!ri)
                 return NULL;
-       page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+       page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
         if (!page)
                 goto err_out;
         atomic_set(&ri->use_count, 1);
@@ -112,7 +116,7 @@ EXPORT_SYMBOL_GPL(kvm_alloc_rma);
  void kvm_release_rma(struct kvm_rma_info *ri)
  {
         if (atomic_dec_and_test(&ri->use_count)) {
-               kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+               cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
                 kfree(ri);
         }
  }
@@ -131,16 +135,18 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
  {
         unsigned long align_pages = HPT_ALIGN_PAGES;
  
+       VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
         /* Old CPUs require HPT aligned on a multiple of its size */
         if (!cpu_has_feature(CPU_FTR_ARCH_206))
                 align_pages = nr_pages;
-       return kvm_alloc_cma(nr_pages, align_pages);
+       return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
  }
  EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
  
  void kvm_release_hpt(struct page *page, unsigned long nr_pages)
  {
-       kvm_release_cma(page, nr_pages);
+       cma_release(kvm_cma, page, nr_pages);
  }
  EXPORT_SYMBOL_GPL(kvm_release_hpt);
  
@@ -179,7 +185,8 @@ void __init kvm_cma_reserve(void)
                         align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
  
                 align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
-               kvm_cma_declare_contiguous(selected_size, align_size);
+               cma_declare_contiguous(0, selected_size, 0, align_size,
+                       KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
         }
  }
  
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c

deleted file mode 100644 (file)

index d9d3d85..0000000
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-#define pr_fmt(fmt) "kvm_cma: " fmt
-
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-#  define DEBUG
-#endif
-#endif
-
-#include <linux/memblock.h>
-#include <linux/mutex.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-
-#include "book3s_hv_cma.h"
-
-struct kvm_cma {
-       unsigned long   base_pfn;
-       unsigned long   count;
-       unsigned long   *bitmap;
-};
-
-static DEFINE_MUTEX(kvm_cma_mutex);
-static struct kvm_cma kvm_cma_area;
-
-/**
- * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
- *                               for kvm hash pagetable
- * @size:  Size of the reserved memory.
- * @alignment:  Alignment for the contiguous memory area
- *
- * This function reserves memory for kvm cma area. It should be
- * called by arch code when early allocator (memblock or bootmem)
- * is still activate.
- */
-long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
-{
-       long base_pfn;
-       phys_addr_t addr;
-       struct kvm_cma *cma = &kvm_cma_area;
-
-       pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
-
-       if (!size)
-               return -EINVAL;
-       /*
-        * Sanitise input arguments.
-        * We should be pageblock aligned for CMA.
-        */
-       alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
-       size = ALIGN(size, alignment);
-       /*
-        * Reserve memory
-        * Use __memblock_alloc_base() since
-        * memblock_alloc_base() panic()s.
-        */
-       addr = __memblock_alloc_base(size, alignment, 0);
-       if (!addr) {
-               base_pfn = -ENOMEM;
-               goto err;
-       } else
-               base_pfn = PFN_DOWN(addr);
-
-       /*
-        * Each reserved area must be initialised later, when more kernel
-        * subsystems (like slab allocator) are available.
-        */
-       cma->base_pfn = base_pfn;
-       cma->count    = size >> PAGE_SHIFT;
-       pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
-       return 0;
-err:
-       pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-       return base_pfn;
-}
-
-/**
- * kvm_alloc_cma() - allocate pages from contiguous area
- * @nr_pages: Requested number of pages.
- * @align_pages: Requested alignment in number of pages
- *
- * This function allocates memory buffer for hash pagetable.
- */
-struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
-{
-       int ret;
-       struct page *page = NULL;
-       struct kvm_cma *cma = &kvm_cma_area;
-       unsigned long chunk_count, nr_chunk;
-       unsigned long mask, pfn, pageno, start = 0;
-
-
-       if (!cma || !cma->count)
-               return NULL;
-
-       pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
-                (void *)cma, nr_pages, align_pages);
-
-       if (!nr_pages)
-               return NULL;
-       /*
-        * align mask with chunk size. The bit tracks pages in chunk size
-        */
-       VM_BUG_ON(!is_power_of_2(align_pages));
-       mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
-       BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
-
-       chunk_count = cma->count >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-       nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
-       mutex_lock(&kvm_cma_mutex);
-       for (;;) {
-               pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
-                                                   start, nr_chunk, mask);
-               if (pageno >= chunk_count)
-                       break;
-
-               pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
-               ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
-               if (ret == 0) {
-                       bitmap_set(cma->bitmap, pageno, nr_chunk);
-                       page = pfn_to_page(pfn);
-                       memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
-                       break;
-               } else if (ret != -EBUSY) {
-                       break;
-               }
-               pr_debug("%s(): memory range at %p is busy, retrying\n",
-                        __func__, pfn_to_page(pfn));
-               /* try again with a bit different memory target */
-               start = pageno + mask + 1;
-       }
-       mutex_unlock(&kvm_cma_mutex);
-       pr_debug("%s(): returned %p\n", __func__, page);
-       return page;
-}
-
-/**
- * kvm_release_cma() - release allocated pages for hash pagetable
- * @pages: Allocated pages.
- * @nr_pages: Number of allocated pages.
- *
- * This function releases memory allocated by kvm_alloc_cma().
- * It returns false when provided pages do not belong to contiguous area and
- * true otherwise.
- */
-bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
-{
-       unsigned long pfn;
-       unsigned long nr_chunk;
-       struct kvm_cma *cma = &kvm_cma_area;
-
-       if (!cma || !pages)
-               return false;
-
-       pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
-
-       pfn = page_to_pfn(pages);
-
-       if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
-               return false;
-
-       VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
-       nr_chunk = nr_pages >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
-       mutex_lock(&kvm_cma_mutex);
-       bitmap_clear(cma->bitmap,
-                    (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
-                    nr_chunk);
-       free_contig_range(pfn, nr_pages);
-       mutex_unlock(&kvm_cma_mutex);
-
-       return true;
-}
-
-static int __init kvm_cma_activate_area(unsigned long base_pfn,
-                                       unsigned long count)
-{
-       unsigned long pfn = base_pfn;
-       unsigned i = count >> pageblock_order;
-       struct zone *zone;
-
-       WARN_ON_ONCE(!pfn_valid(pfn));
-       zone = page_zone(pfn_to_page(pfn));
-       do {
-               unsigned j;
-               base_pfn = pfn;
-               for (j = pageblock_nr_pages; j; --j, pfn++) {
-                       WARN_ON_ONCE(!pfn_valid(pfn));
-                       /*
-                        * alloc_contig_range requires the pfn range
-                        * specified to be in the same zone. Make this
-                        * simple by forcing the entire CMA resv range
-                        * to be in the same zone.
-                        */
-                       if (page_zone(pfn_to_page(pfn)) != zone)
-                               return -EINVAL;
-               }
-               init_cma_reserved_pageblock(pfn_to_page(base_pfn));
-       } while (--i);
-       return 0;
-}
-
-static int __init kvm_cma_init_reserved_areas(void)
-{
-       int bitmap_size, ret;
-       unsigned long chunk_count;
-       struct kvm_cma *cma = &kvm_cma_area;
-
-       pr_debug("%s()\n", __func__);
-       if (!cma->count)
-               return 0;
-       chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-       bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
-       cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-       if (!cma->bitmap)
-               return -ENOMEM;
-
-       ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
-       if (ret)
-               goto error;
-       return 0;
-
-error:
-       kfree(cma->bitmap);
-       return ret;
-}
-core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h

deleted file mode 100644 (file)

index 655144f..0000000
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-
-#ifndef __POWERPC_KVM_CMA_ALLOC_H__
-#define __POWERPC_KVM_CMA_ALLOC_H__
-/*
- * Both RMA and Hash page allocation will be multiple of 256K.
- */
-#define KVM_CMA_CHUNK_ORDER    18
-
-extern struct page *kvm_alloc_cma(unsigned long nr_pages,
-                                 unsigned long align_pages);
-extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
-extern long kvm_cma_declare_contiguous(phys_addr_t size,
-                                      phys_addr_t alignment) __init;
-#endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c

index 2c8e90f5789e4a72feb2e5a019a63a9e1dded518..e0f7a189c48ea440dc2e05ac659eb46895490590 100644 (file)
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
                 return -EINVAL;
  
         /* this should work for most non-highmem platforms */
-       zone = pgdata->node_zones;
+       zone = pgdata->node_zones +
+               zone_for_memory(nid, start, size, 0);
  
         return __add_pages(nid, zone, start_pfn, nr_pages);
  }
diff --git a/arch/score/include/uapi/asm/ptrace.h b/arch/score/include/uapi/asm/ptrace.h

index f59771a3f127749c9f642e8881f2f5ef477fc10f..5c5e794058be1b234c601a11ef099df400e3df6a 100644 (file)
--- a/arch/score/include/uapi/asm/ptrace.h
+++ b/arch/score/include/uapi/asm/ptrace.h
@@ -4,17 +4,6 @@
  #define PTRACE_GETREGS         12
  #define PTRACE_SETREGS         13
  
-#define PC             32
-#define CONDITION      33
-#define ECR            34
-#define EMA            35
-#define CEH            36
-#define CEL            37
-#define COUNTER                38
-#define LDCR           39
-#define STCR           40
-#define PSR            41
-
  #define SINGLESTEP16_INSN      0x7006
  #define SINGLESTEP32_INSN      0x840C8000
  #define BREAKPOINT16_INSN      0x7002          /* work on SPG300 */
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig

index cfd5b90a862820c5a55cf69e2f9612ed1ba765b5..78bc97b1d0270d8e5aabe0e22d49d58041e68abd 100644 (file)
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -12,9 +12,8 @@ config SH_DMA_IRQ_MULTI
         default y if CPU_SUBTYPE_SH7750  || CPU_SUBTYPE_SH7751  || \
                      CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \
                      CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091  || \
-                    CPU_SUBTYPE_SH7763  || CPU_SUBTYPE_SH7764  || \
-                    CPU_SUBTYPE_SH7780  || CPU_SUBTYPE_SH7785  || \
-                    CPU_SUBTYPE_SH7760
+                    CPU_SUBTYPE_SH7763  || CPU_SUBTYPE_SH7780  || \
+                    CPU_SUBTYPE_SH7785  || CPU_SUBTYPE_SH7760
  
  config SH_DMA_API
         depends on SH_DMA
diff --git a/arch/sh/include/asm/io_noioport.h b/arch/sh/include/asm/io_noioport.h

index 4d48f1436a63b72a34f8201ad8cb88416bc1a37d..c727e6ddf69e180f4c01147e371fb722733af7b2 100644 (file)
--- a/arch/sh/include/asm/io_noioport.h
+++ b/arch/sh/include/asm/io_noioport.h
@@ -34,6 +34,17 @@ static inline void outl(unsigned int x, unsigned long port)
         BUG();
  }
  
+static inline void __iomem *ioport_map(unsigned long port, unsigned int size)
+{
+       BUG();
+       return NULL;
+}
+
+static inline void ioport_unmap(void __iomem *addr)
+{
+       BUG();
+}
+
  #define inb_p(addr)    inb(addr)
  #define inw_p(addr)    inw(addr)
  #define inl_p(addr)    inl(addr)
diff --git a/arch/sh/include/cpu-sh4/cpu/dma-register.h b/arch/sh/include/cpu-sh4/cpu/dma-register.h

index 02788b6a03b7c9654711b9010ec6cedc1a5edc65..9cd81e54056ac477f1a8091dafd5b736605f7578 100644 (file)
--- a/arch/sh/include/cpu-sh4/cpu/dma-register.h
+++ b/arch/sh/include/cpu-sh4/cpu/dma-register.h
@@ -32,7 +32,6 @@
  #define CHCR_TS_HIGH_SHIFT     (20 - 2)        /* 2 bits for shifted low TS */
  #elif defined(CONFIG_CPU_SUBTYPE_SH7757) || \
         defined(CONFIG_CPU_SUBTYPE_SH7763) || \
-       defined(CONFIG_CPU_SUBTYPE_SH7764) || \
         defined(CONFIG_CPU_SUBTYPE_SH7780) || \
         defined(CONFIG_CPU_SUBTYPE_SH7785)
  #define CHCR_TS_LOW_MASK       0x00000018
diff --git a/arch/sh/include/cpu-sh4a/cpu/dma.h b/arch/sh/include/cpu-sh4a/cpu/dma.h

index 89afb650ce2524b94fa292fbea1bd59b17f6fb22..8ceccceae84419d3a3733128d13e7321a4908266 100644 (file)
--- a/arch/sh/include/cpu-sh4a/cpu/dma.h
+++ b/arch/sh/include/cpu-sh4a/cpu/dma.h
@@ -14,8 +14,7 @@
  #define DMTE4_IRQ      evt2irq(0xb80)
  #define DMAE0_IRQ      evt2irq(0xbc0)  /* DMA Error IRQ*/
  #define SH_DMAC_BASE0  0xFE008020
-#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
-       defined(CONFIG_CPU_SUBTYPE_SH7764)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7763)
  #define DMTE0_IRQ      evt2irq(0x640)
  #define DMTE4_IRQ      evt2irq(0x780)
  #define DMAE0_IRQ      evt2irq(0x6c0)
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c

index f579dd528198919914d9433984b02df7084d90f0..c187b9579c212bdd45c5ead12410775f0598bd48 100644 (file)
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -307,7 +307,7 @@ static struct clk_lookup lookups[] = {
         CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[HWBLK_TMU0]),
         CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[HWBLK_TMU1]),
  
-       CLKDEV_ICK_ID("fck", "sh-cmt-16.0", &mstp_clks[HWBLK_CMT]),
+       CLKDEV_ICK_ID("fck", "sh-cmt-32.0", &mstp_clks[HWBLK_CMT]),
         CLKDEV_DEV_ID("sh-wdt.0", &mstp_clks[HWBLK_RWDT]),
         CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[HWBLK_DMAC1]),
  
@@ -332,6 +332,8 @@ static struct clk_lookup lookups[] = {
         CLKDEV_CON_ID("tsif0", &mstp_clks[HWBLK_TSIF]),
         CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[HWBLK_USB1]),
         CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[HWBLK_USB0]),
+       CLKDEV_CON_ID("usb1", &mstp_clks[HWBLK_USB1]),
+       CLKDEV_CON_ID("usb0", &mstp_clks[HWBLK_USB0]),
         CLKDEV_CON_ID("2dg0", &mstp_clks[HWBLK_2DG]),
         CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[HWBLK_SDHI0]),
         CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[HWBLK_SDHI1]),
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c

index 552c8fcf9416413f3b8ebf606371d88cb85eebeb..d6d0a986c6e937680186a32bbfd602c5ff40d10e 100644 (file)
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -80,10 +80,8 @@ static int __init rtc_generic_init(void)
                 return -ENODEV;
  
         pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
-       if (IS_ERR(pdev))
-               return PTR_ERR(pdev);
  
-       return 0;
+       return PTR_ERR_OR_ZERO(pdev);
  }
  module_init(rtc_generic_init);
  
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c

index 74c03ecc48718c6e41abb7b59f50f715eb992681..ecfc6b0c1da1061d781b49d9a6e9cb2713d09bfa 100644 (file)
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -67,10 +67,8 @@ static int __init asids_debugfs_init(void)
                                            NULL, &asids_debugfs_fops);
         if (!asids_dentry)
                 return -ENOMEM;
-       if (IS_ERR(asids_dentry))
-               return PTR_ERR(asids_dentry);
  
-       return 0;
+       return PTR_ERR_OR_ZERO(asids_dentry);
  }
  module_init(asids_debugfs_init);
  
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c

index 2d089fe2cba910153e8d9bf1db927fd18ca9a83d..2790b6a64157f79663fe5232afe9f857e6d81cb7 100644 (file)
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -495,8 +495,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
         pgdat = NODE_DATA(nid);
  
         /* We only have ZONE_NORMAL, so this is easy.. */
-       ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
-                               start_pfn, nr_pages);
+       ret = __add_pages(nid, pgdat->node_zones +
+                       zone_for_memory(nid, start, size, ZONE_NORMAL),
+                       start_pfn, nr_pages);
         if (unlikely(ret))
                 printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
  
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c

index 4918d91bc3a660942a6aec8585be2bb41425d3df..d19b13e3a59fc967e175a5d51e829f98aac763dd 100644 (file)
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
         area->nr_pages = npages;
         area->pages = pages;
  
-       if (map_vm_area(area, prot_rwx, &pages)) {
+       if (map_vm_area(area, prot_rwx, pages)) {
                 vunmap(area->addr);
                 goto error;
         }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index 1dbade870f90dd8b27140e10275df6d0610506be..a241946815131904498ae6ecd95d87c4e28bf2f9 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1218,7 +1218,8 @@ good_area:
         /*
          * If for any reason at all we couldn't handle the fault,
          * make sure we exit gracefully rather than endlessly redo
-        * the fault:
+        * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
+        * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
          */
         fault = handle_mm_fault(mm, vma, address, flags);
  
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c

index e39504878aecd22f6688073bd3fca80f39bfd9c1..7d05565ba7813047cfc4f4d96d339f9af0e5c3d3 100644 (file)
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -825,7 +825,8 @@ void __init mem_init(void)
  int arch_add_memory(int nid, u64 start, u64 size)
  {
         struct pglist_data *pgdata = NODE_DATA(nid);
-       struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
+       struct zone *zone = pgdata->node_zones +
+               zone_for_memory(nid, start, size, ZONE_HIGHMEM);
         unsigned long start_pfn = start >> PAGE_SHIFT;
         unsigned long nr_pages = size >> PAGE_SHIFT;
  
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c

index df1a9927ad29ef9aa727851e775badb0f43a5416..5621c47d7a1a0e7274c7fd49b6aa7602d952bd74 100644 (file)
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -691,7 +691,8 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
  int arch_add_memory(int nid, u64 start, u64 size)
  {
         struct pglist_data *pgdat = NODE_DATA(nid);
-       struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+       struct zone *zone = pgdat->node_zones +
+               zone_for_memory(nid, start, size, ZONE_NORMAL);
         unsigned long start_pfn = start >> PAGE_SHIFT;
         unsigned long nr_pages = size >> PAGE_SHIFT;
         int ret;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig

index e65d400efd449a9030f5ccbef1a27d68587aa797..e1b92788c225875efb978f21f8cd25f817f68a52 100644 (file)
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -16,6 +16,7 @@ menuconfig ATA
         depends on BLOCK
         depends on !(M32R || M68K || S390) || BROKEN
         select SCSI
+       select GLOB
         ---help---
           If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or
           any other ATA device under Linux, say Y and make sure that you know
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c

index 677c0c1b03bd658322cad2faf5becd86ce1db3cd..dbdc5d32343f53f7ed96a94cf2d6d69258a54237 100644 (file)
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -59,6 +59,7 @@
  #include <linux/async.h>
  #include <linux/log2.h>
  #include <linux/slab.h>
+#include <linux/glob.h>
  #include <scsi/scsi.h>
  #include <scsi/scsi_cmnd.h>
  #include <scsi/scsi_host.h>
@@ -4250,73 +4251,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
         { }
  };
  
-/**
- *     glob_match - match a text string against a glob-style pattern
- *     @text: the string to be examined
- *     @pattern: the glob-style pattern to be matched against
- *
- *     Either/both of text and pattern can be empty strings.
- *
- *     Match text against a glob-style pattern, with wildcards and simple sets:
- *
- *             ?       matches any single character.
- *             *       matches any run of characters.
- *             [xyz]   matches a single character from the set: x, y, or z.
- *             [a-d]   matches a single character from the range: a, b, c, or d.
- *             [a-d0-9] matches a single character from either range.
- *
- *     The special characters ?, [, -, or *, can be matched using a set, eg. [*]
- *     Behaviour with malformed patterns is undefined, though generally reasonable.
- *
- *     Sample patterns:  "SD1?",  "SD1[0-5]",  "*R0",  "SD*1?[012]*xx"
- *
- *     This function uses one level of recursion per '*' in pattern.
- *     Since it calls _nothing_ else, and has _no_ explicit local variables,
- *     this will not cause stack problems for any reasonable use here.
- *
- *     RETURNS:
- *     0 on match, 1 otherwise.
- */
-static int glob_match (const char *text, const char *pattern)
-{
-       do {
-               /* Match single character or a '?' wildcard */
-               if (*text == *pattern || *pattern == '?') {
-                       if (!*pattern++)
-                               return 0;  /* End of both strings: match */
-               } else {
-                       /* Match single char against a '[' bracketed ']' pattern set */
-                       if (!*text || *pattern != '[')
-                               break;  /* Not a pattern set */
-                       while (*++pattern && *pattern != ']' && *text != *pattern) {
-                               if (*pattern == '-' && *(pattern - 1) != '[')
-                                       if (*text > *(pattern - 1) && *text < *(pattern + 1)) {
-                                               ++pattern;
-                                               break;
-                                       }
-                       }
-                       if (!*pattern || *pattern == ']')
-                               return 1;  /* No match */
-                       while (*pattern && *pattern++ != ']');
-               }
-       } while (*++text && *pattern);
-
-       /* Match any run of chars against a '*' wildcard */
-       if (*pattern == '*') {
-               if (!*++pattern)
-                       return 0;  /* Match: avoid recursion at end of pattern */
-               /* Loop to handle additional pattern chars after the wildcard */
-               while (*text) {
-                       if (glob_match(text, pattern) == 0)
-                               return 0;  /* Remainder matched */
-                       ++text;  /* Absorb (match) this char and try again */
-               }
-       }
-       if (!*text && !*pattern)
-               return 0;  /* End of both strings: match */
-       return 1;  /* No match */
-}
-
  static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
  {
         unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -4327,10 +4261,10 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
         ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev));
  
         while (ad->model_num) {
-               if (!glob_match(model_num, ad->model_num)) {
+               if (glob_match(model_num, ad->model_num)) {
                         if (ad->model_rev == NULL)
                                 return ad->horkage;
-                       if (!glob_match(model_rev, ad->model_rev))
+                       if (glob_match(model_rev, ad->model_rev))
                                 return ad->horkage;
                 }
                 ad++;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig

index 88500fed3c7a41f13c3cea282d7241c13d2f940c..4e7f0ff83ae7b162c43355b6727019c7edfbe897 100644 (file)
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -289,16 +289,6 @@ config CMA_ALIGNMENT
  
           If unsure, leave the default value "8".
  
-config CMA_AREAS
-       int "Maximum count of the CMA device-private areas"
-       default 7
-       help
-         CMA allows to create CMA areas for particular devices. This parameter
-         sets the maximum number of such device private CMA areas in the
-         system.
-
-         If unsure, leave the default value "7".
-
  endif
  
  endmenu
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c

index 6467c919c50993ebfc32e14b2ac2860c0b3beae8..6606abdf880c816ce407c486fbabcc27d602cef9 100644 (file)
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -24,23 +24,9 @@
  
  #include <linux/memblock.h>
  #include <linux/err.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-isolation.h>
  #include <linux/sizes.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/mm_types.h>
  #include <linux/dma-contiguous.h>
-
-struct cma {
-       unsigned long   base_pfn;
-       unsigned long   count;
-       unsigned long   *bitmap;
-       struct mutex    lock;
-};
-
-struct cma *dma_contiguous_default_area;
+#include <linux/cma.h>
  
  #ifdef CONFIG_CMA_SIZE_MBYTES
  #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -48,6 +34,8 @@ struct cma *dma_contiguous_default_area;
  #define CMA_SIZE_MBYTES 0
  #endif
  
+struct cma *dma_contiguous_default_area;
+
  /*
   * Default global CMA area size can be defined in kernel's .config.
   * This is useful mainly for distro maintainers to create a kernel
@@ -154,65 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
         }
  }
  
-static DEFINE_MUTEX(cma_mutex);
-
-static int __init cma_activate_area(struct cma *cma)
-{
-       int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
-       unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
-       unsigned i = cma->count >> pageblock_order;
-       struct zone *zone;
-
-       cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-
-       if (!cma->bitmap)
-               return -ENOMEM;
-
-       WARN_ON_ONCE(!pfn_valid(pfn));
-       zone = page_zone(pfn_to_page(pfn));
-
-       do {
-               unsigned j;
-               base_pfn = pfn;
-               for (j = pageblock_nr_pages; j; --j, pfn++) {
-                       WARN_ON_ONCE(!pfn_valid(pfn));
-                       /*
-                        * alloc_contig_range requires the pfn range
-                        * specified to be in the same zone. Make this
-                        * simple by forcing the entire CMA resv range
-                        * to be in the same zone.
-                        */
-                       if (page_zone(pfn_to_page(pfn)) != zone)
-                               goto err;
-               }
-               init_cma_reserved_pageblock(pfn_to_page(base_pfn));
-       } while (--i);
-
-       mutex_init(&cma->lock);
-       return 0;
-
-err:
-       kfree(cma->bitmap);
-       return -EINVAL;
-}
-
-static struct cma cma_areas[MAX_CMA_AREAS];
-static unsigned cma_area_count;
-
-static int __init cma_init_reserved_areas(void)
-{
-       int i;
-
-       for (i = 0; i < cma_area_count; i++) {
-               int ret = cma_activate_area(&cma_areas[i]);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-core_initcall(cma_init_reserved_areas);
-
  /**
   * dma_contiguous_reserve_area() - reserve custom contiguous area
   * @size: Size of the reserved area (in bytes),
@@ -234,72 +163,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
                                        phys_addr_t limit, struct cma **res_cma,
                                        bool fixed)
  {
-       struct cma *cma = &cma_areas[cma_area_count];
-       phys_addr_t alignment;
-       int ret = 0;
-
-       pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
-                (unsigned long)size, (unsigned long)base,
-                (unsigned long)limit);
-
-       /* Sanity checks */
-       if (cma_area_count == ARRAY_SIZE(cma_areas)) {
-               pr_err("Not enough slots for CMA reserved regions!\n");
-               return -ENOSPC;
-       }
-
-       if (!size)
-               return -EINVAL;
-
-       /* Sanitise input arguments */
-       alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
-       base = ALIGN(base, alignment);
-       size = ALIGN(size, alignment);
-       limit &= ~(alignment - 1);
-
-       /* Reserve memory */
-       if (base && fixed) {
-               if (memblock_is_region_reserved(base, size) ||
-                   memblock_reserve(base, size) < 0) {
-                       ret = -EBUSY;
-                       goto err;
-               }
-       } else {
-               phys_addr_t addr = memblock_alloc_range(size, alignment, base,
-                                                       limit);
-               if (!addr) {
-                       ret = -ENOMEM;
-                       goto err;
-               } else {
-                       base = addr;
-               }
-       }
-
-       /*
-        * Each reserved area must be initialised later, when more kernel
-        * subsystems (like slab allocator) are available.
-        */
-       cma->base_pfn = PFN_DOWN(base);
-       cma->count = size >> PAGE_SHIFT;
-       *res_cma = cma;
-       cma_area_count++;
+       int ret;
  
-       pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
-               (unsigned long)base);
+       ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma);
+       if (ret)
+               return ret;
  
         /* Architecture specific contiguous memory fixup. */
-       dma_contiguous_early_fixup(base, size);
-       return 0;
-err:
-       pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-       return ret;
-}
+       dma_contiguous_early_fixup(cma_get_base(*res_cma),
+                               cma_get_size(*res_cma));
  
-static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
-{
-       mutex_lock(&cma->lock);
-       bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
-       mutex_unlock(&cma->lock);
+       return 0;
  }
  
  /**
@@ -316,62 +190,10 @@ static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
  struct page *dma_alloc_from_contiguous(struct device *dev, int count,
                                        unsigned int align)
  {
-       unsigned long mask, pfn, pageno, start = 0;
-       struct cma *cma = dev_get_cma_area(dev);
-       struct page *page = NULL;
-       int ret;
-
-       if (!cma || !cma->count)
-               return NULL;
-
         if (align > CONFIG_CMA_ALIGNMENT)
                 align = CONFIG_CMA_ALIGNMENT;
  
-       pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
-                count, align);
-
-       if (!count)
-               return NULL;
-
-       mask = (1 << align) - 1;
-
-
-       for (;;) {
-               mutex_lock(&cma->lock);
-               pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
-                                                   start, count, mask);
-               if (pageno >= cma->count) {
-                       mutex_unlock(&cma->lock);
-                       break;
-               }
-               bitmap_set(cma->bitmap, pageno, count);
-               /*
-                * It's safe to drop the lock here. We've marked this region for
-                * our exclusive use. If the migration fails we will take the
-                * lock again and unmark it.
-                */
-               mutex_unlock(&cma->lock);
-
-               pfn = cma->base_pfn + pageno;
-               mutex_lock(&cma_mutex);
-               ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
-               mutex_unlock(&cma_mutex);
-               if (ret == 0) {
-                       page = pfn_to_page(pfn);
-                       break;
-               } else if (ret != -EBUSY) {
-                       clear_cma_bitmap(cma, pfn, count);
-                       break;
-               }
-               clear_cma_bitmap(cma, pfn, count);
-               pr_debug("%s(): memory range at %p is busy, retrying\n",
-                        __func__, pfn_to_page(pfn));
-               /* try again with a bit different memory target */
-               start = pageno + mask + 1;
-       }
-
-       pr_debug("%s(): returned %p\n", __func__, page);
-       return page;
+       return cma_alloc(dev_get_cma_area(dev), count, align);
  }
  
  /**
@@ -387,23 +209,5 @@ struct page *dma_alloc_from_contiguous(struct device *dev, int count,
  bool dma_release_from_contiguous(struct device *dev, struct page *pages,
                                  int count)
  {
-       struct cma *cma = dev_get_cma_area(dev);
-       unsigned long pfn;
-
-       if (!cma || !pages)
-               return false;
-
-       pr_debug("%s(page %p)\n", __func__, (void *)pages);
-
-       pfn = page_to_pfn(pages);
-
-       if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
-               return false;
-
-       VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
-
-       free_contig_range(pfn, count);
-       clear_cma_bitmap(cma, pfn, count);
-
-       return true;
+       return cma_release(dev_get_cma_area(dev), pages, count);
  }
diff --git a/drivers/base/memory.c b/drivers/base/memory.c

index 89f752dd8465acdd1f9a7696c34f8dfc572c31e3..a2e13e250bba2f54eea93e72e9692340d3298d66 100644 (file)
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev)
          * attribute and need to set the online_type.
          */
         if (mem->online_type < 0)
-               mem->online_type = ONLINE_KEEP;
+               mem->online_type = MMOP_ONLINE_KEEP;
  
         ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
  
@@ -315,23 +315,23 @@ store_mem_state(struct device *dev,
         if (ret)
                 return ret;
  
-       if (!strncmp(buf, "online_kernel", min_t(int, count, 13)))
-               online_type = ONLINE_KERNEL;
-       else if (!strncmp(buf, "online_movable", min_t(int, count, 14)))
-               online_type = ONLINE_MOVABLE;
-       else if (!strncmp(buf, "online", min_t(int, count, 6)))
-               online_type = ONLINE_KEEP;
-       else if (!strncmp(buf, "offline", min_t(int, count, 7)))
-               online_type = -1;
+       if (sysfs_streq(buf, "online_kernel"))
+               online_type = MMOP_ONLINE_KERNEL;
+       else if (sysfs_streq(buf, "online_movable"))
+               online_type = MMOP_ONLINE_MOVABLE;
+       else if (sysfs_streq(buf, "online"))
+               online_type = MMOP_ONLINE_KEEP;
+       else if (sysfs_streq(buf, "offline"))
+               online_type = MMOP_OFFLINE;
         else {
                 ret = -EINVAL;
                 goto err;
         }
  
         switch (online_type) {
-       case ONLINE_KERNEL:
-       case ONLINE_MOVABLE:
-       case ONLINE_KEEP:
+       case MMOP_ONLINE_KERNEL:
+       case MMOP_ONLINE_MOVABLE:
+       case MMOP_ONLINE_KEEP:
                 /*
                  * mem->online_type is not protected so there can be a
                  * race here.  However, when racing online, the first
@@ -342,7 +342,7 @@ store_mem_state(struct device *dev,
                 mem->online_type = online_type;
                 ret = device_online(&mem->dev);
                 break;
-       case -1:
+       case MMOP_OFFLINE:
                 ret = device_offline(&mem->dev);
                 break;
         default:
@@ -406,7 +406,9 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
         int i, ret;
         unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
  
-       phys_addr = simple_strtoull(buf, NULL, 0);
+       ret = kstrtoull(buf, 0, &phys_addr);
+       if (ret)
+               return ret;
  
         if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
                 return -EINVAL;
diff --git a/drivers/base/node.c b/drivers/base/node.c

index 8f7ed9933a7c6939d7703badeeacb97fcfc803aa..c6d3ae05f1ca4d98667490d7db0b6375f61b9c26 100644 (file)
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -126,7 +126,7 @@ static ssize_t node_read_meminfo(struct device *dev,
                        nid, K(node_page_state(nid, NR_FILE_PAGES)),
                        nid, K(node_page_state(nid, NR_FILE_MAPPED)),
                        nid, K(node_page_state(nid, NR_ANON_PAGES)),
-                      nid, K(node_page_state(nid, NR_SHMEM)),
+                      nid, K(i.sharedram),
                        nid, node_page_state(nid, NR_KERNEL_STACK) *
                                 THREAD_SIZE / 1024,
                        nid, K(node_page_state(nid, NR_PAGETABLE)),
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c

index 36e54be402df30b68c579e09c588193f5f015e00..dfa4024c448a6222d8d12ffb2f05e1976652fca0 100644 (file)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -183,19 +183,32 @@ static ssize_t comp_algorithm_store(struct device *dev,
  static int zram_test_flag(struct zram_meta *meta, u32 index,
                         enum zram_pageflags flag)
  {
-       return meta->table[index].flags & BIT(flag);
+       return meta->table[index].value & BIT(flag);
  }
  
  static void zram_set_flag(struct zram_meta *meta, u32 index,
                         enum zram_pageflags flag)
  {
-       meta->table[index].flags |= BIT(flag);
+       meta->table[index].value |= BIT(flag);
  }
  
  static void zram_clear_flag(struct zram_meta *meta, u32 index,
                         enum zram_pageflags flag)
  {
-       meta->table[index].flags &= ~BIT(flag);
+       meta->table[index].value &= ~BIT(flag);
+}
+
+static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+{
+       return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+
+static void zram_set_obj_size(struct zram_meta *meta,
+                                       u32 index, size_t size)
+{
+       unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+
+       meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
  }
  
  static inline int is_partial_io(struct bio_vec *bvec)
@@ -255,7 +268,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
                 goto free_table;
         }
  
-       rwlock_init(&meta->tb_lock);
         return meta;
  
  free_table:
@@ -304,7 +316,12 @@ static void handle_zero_page(struct bio_vec *bvec)
         flush_dcache_page(page);
  }
  
-/* NOTE: caller should hold meta->tb_lock with write-side */
+
+/*
+ * To protect concurrent access to the same index entry,
+ * caller should hold this table index entry's bit_spinlock to
+ * indicate this index entry is accessing.
+ */
  static void zram_free_page(struct zram *zram, size_t index)
  {
         struct zram_meta *meta = zram->meta;
@@ -324,11 +341,12 @@ static void zram_free_page(struct zram *zram, size_t index)
  
         zs_free(meta->mem_pool, handle);
  
-       atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size);
+       atomic64_sub(zram_get_obj_size(meta, index),
+                       &zram->stats.compr_data_size);
         atomic64_dec(&zram->stats.pages_stored);
  
         meta->table[index].handle = 0;
-       meta->table[index].size = 0;
+       zram_set_obj_size(meta, index, 0);
  }
  
  static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
@@ -337,14 +355,14 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
         unsigned char *cmem;
         struct zram_meta *meta = zram->meta;
         unsigned long handle;
-       u16 size;
+       size_t size;
  
-       read_lock(&meta->tb_lock);
+       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
         handle = meta->table[index].handle;
-       size = meta->table[index].size;
+       size = zram_get_obj_size(meta, index);
  
         if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
-               read_unlock(&meta->tb_lock);
+               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                 clear_page(mem);
                 return 0;
         }
@@ -355,7 +373,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
         else
                 ret = zcomp_decompress(zram->comp, cmem, size, mem);
         zs_unmap_object(meta->mem_pool, handle);
-       read_unlock(&meta->tb_lock);
+       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  
         /* Should NEVER happen. Return bio error if it does. */
         if (unlikely(ret)) {
@@ -376,14 +394,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
         struct zram_meta *meta = zram->meta;
         page = bvec->bv_page;
  
-       read_lock(&meta->tb_lock);
+       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
         if (unlikely(!meta->table[index].handle) ||
                         zram_test_flag(meta, index, ZRAM_ZERO)) {
-               read_unlock(&meta->tb_lock);
+               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                 handle_zero_page(bvec);
                 return 0;
         }
-       read_unlock(&meta->tb_lock);
+       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  
         if (is_partial_io(bvec))
                 /* Use  a temporary buffer to decompress the page */
@@ -461,10 +479,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
         if (page_zero_filled(uncmem)) {
                 kunmap_atomic(user_mem);
                 /* Free memory associated with this sector now. */
-               write_lock(&zram->meta->tb_lock);
+               bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
                 zram_free_page(zram, index);
                 zram_set_flag(meta, index, ZRAM_ZERO);
-               write_unlock(&zram->meta->tb_lock);
+               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  
                 atomic64_inc(&zram->stats.zero_pages);
                 ret = 0;
@@ -514,12 +532,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
          * Free memory associated with this sector
          * before overwriting unused sectors.
          */
-       write_lock(&zram->meta->tb_lock);
+       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
         zram_free_page(zram, index);
  
         meta->table[index].handle = handle;
-       meta->table[index].size = clen;
-       write_unlock(&zram->meta->tb_lock);
+       zram_set_obj_size(meta, index, clen);
+       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  
         /* Update stats */
         atomic64_add(clen, &zram->stats.compr_data_size);
@@ -560,6 +578,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
                              int offset, struct bio *bio)
  {
         size_t n = bio->bi_iter.bi_size;
+       struct zram_meta *meta = zram->meta;
  
         /*
          * zram manages data in physical block size units. Because logical block
@@ -580,13 +599,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
         }
  
         while (n >= PAGE_SIZE) {
-               /*
-                * Discard request can be large so the lock hold times could be
-                * lengthy.  So take the lock once per page.
-                */
-               write_lock(&zram->meta->tb_lock);
+               bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
                 zram_free_page(zram, index);
-               write_unlock(&zram->meta->tb_lock);
+               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                 index++;
                 n -= PAGE_SIZE;
         }
@@ -821,9 +836,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
         zram = bdev->bd_disk->private_data;
         meta = zram->meta;
  
-       write_lock(&meta->tb_lock);
+       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
         zram_free_page(zram, index);
-       write_unlock(&meta->tb_lock);
+       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
         atomic64_inc(&zram->stats.notify_free);
  }
  
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h

index 7f21c145e317f49e785eafeb1aea78097109e158..5b0afde729cd885286f48d31991f7bc98f44328b 100644 (file)
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
  /*-- End of configurable params */
  
  #define SECTOR_SHIFT           9
-#define SECTOR_SIZE            (1 << SECTOR_SHIFT)
  #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
  #define SECTORS_PER_PAGE       (1 << SECTORS_PER_PAGE_SHIFT)
  #define ZRAM_LOGICAL_BLOCK_SHIFT 12
@@ -51,10 +50,24 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
  #define ZRAM_SECTOR_PER_LOGICAL_BLOCK  \
         (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
  
-/* Flags for zram pages (table[page_no].flags) */
+
+/*
+ * The lower ZRAM_FLAG_SHIFT bits of table.value is for
+ * object size (excluding header), the higher bits is for
+ * zram_pageflags.
+ *
+ * zram is mainly used for memory efficiency so we want to keep memory
+ * footprint small so we can squeeze size and flags into a field.
+ * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header),
+ * the higher bits is for zram_pageflags.
+ */
+#define ZRAM_FLAG_SHIFT 24
+
+/* Flags for zram pages (table[page_no].value) */
  enum zram_pageflags {
         /* Page consists entirely of zeros */
-       ZRAM_ZERO,
+       ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1,
+       ZRAM_ACCESS,    /* page in now accessed */
  
         __NR_ZRAM_PAGEFLAGS,
  };
@@ -62,11 +75,10 @@ enum zram_pageflags {
  /*-- Data structures */
  
  /* Allocated for each disk page */
-struct table {
+struct zram_table_entry {
         unsigned long handle;
-       u16 size;       /* object size (excluding header) */
-       u8 flags;
-} __aligned(4);
+       unsigned long value;
+};
  
  struct zram_stats {
         atomic64_t compr_data_size;     /* compressed size of pages stored */
@@ -81,8 +93,7 @@ struct zram_stats {
  };
  
  struct zram_meta {
-       rwlock_t tb_lock;       /* protect table */
-       struct table *table;
+       struct zram_table_entry *table;
         struct zs_pool *mem_pool;
  };
  
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c

index 17cf96c45f2b07eaeaa31f7e7bdba63b3c98f7b7..79f18e6d9c4f346e5f04430a67cf7b1cb7dbd97c 100644 (file)
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -286,7 +286,11 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type)
  {
         struct firmware_map_entry *entry;
  
-       entry = firmware_map_find_entry_bootmem(start, end, type);
+       entry = firmware_map_find_entry(start, end - 1, type);
+       if (entry)
+               return 0;
+
+       entry = firmware_map_find_entry_bootmem(start, end - 1, type);
         if (!entry) {
                 entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
                 if (!entry)
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c

index 7e4bae760e2743532c39bd0d49cae176e2c93b2f..c3b80fd65d6254e89caf3381529f673764286115 100644 (file)
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
                 parent = &entry->head;
         }
         if (parent) {
-               hlist_add_after_rcu(parent, &item->head);
+               hlist_add_behind_rcu(&item->head, parent);
         } else {
                 hlist_add_head_rcu(&item->head, h_list);
         }
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c

index ae208f61219804cbac02bc850582c1ad7cfc5f84..cccef87963e050afb99181d63d3ed5dcf401d5a1 100644 (file)
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val)
  DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm,
                         atk_debugfs_gitm_get,
                         NULL,
-                       "0x%08llx\n")
+                       "0x%08llx\n");
  
  static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj)
  {
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c

index 0bf1e4edf04d04a838c9d4cdc8f96aa17dfaa896..6590558d1d31c600b23c8d50f48b4e3b1c461326 100644 (file)
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock);
  static __init int map_switcher(void)
  {
         int i, err;
-       struct page **pagep;
  
         /*
          * Map the Switcher in to high memory.
@@ -110,11 +109,9 @@ static __init int map_switcher(void)
          * This code actually sets up the pages we've allocated to appear at
          * switcher_addr.  map_vm_area() takes the vma we allocated above, the
          * kind of pages we're mapping (kernel pages), and a pointer to our
-        * array of struct pages.  It increments that pointer, but we don't
-        * care.
+        * array of struct pages.
          */
-       pagep = lg_switcher_pages;
-       err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
+       err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages);
         if (err) {
                 printk("lguest: map_vm_area failed: %i\n", err);
                 goto free_vma;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c

index 681a9e81ff512aa874406d6e549485815c29a55d..e8ba7470700af1abaaf33826c7bffc8409666b9e 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1948,7 +1948,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
  
         /* add filter to the list */
         if (parent)
-               hlist_add_after(&parent->fdir_node, &input->fdir_node);
+               hlist_add_behind(&input->fdir_node, &parent->fdir_node);
         else
                 hlist_add_head(&input->fdir_node,
                                &pf->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c

index 94a1c07efeb0b8b6b1915084bdb7d52c0a5aaacb..e4100b5737b67a3cb862428e120e67d51bd1b7fd 100644 (file)
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2517,7 +2517,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
  
         /* add filter to the list */
         if (parent)
-               hlist_add_after(&parent->fdir_node, &input->fdir_node);
+               hlist_add_behind(&input->fdir_node, &parent->fdir_node);
         else
                 hlist_add_head(&input->fdir_node,
                                &adapter->fdir_filter_list);
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c

index 02b0379ae5501ec563cff1647ac817b9b61440d8..4f34dc0095b579fef086508ab27d634d9f410660 100644 (file)
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
  
         for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
                 int ret;
-               struct page **page_array_ptr;
  
                 page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
  
@@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
                 }
                 tmp_area.addr = page_addr;
                 tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */;
-               page_array_ptr = page;
-               ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr);
+               ret = map_vm_area(&tmp_area, PAGE_KERNEL, page);
                 if (ret) {
                         pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
                                proc->pid, page_addr);
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c

index 5dde79418297acf6ddc0a2ec2b8e41ead89b5bd6..8ef1deb59d4a3d3abe6f70862e43a307604d4133 100644 (file)
--- a/drivers/staging/lustre/lustre/libcfs/hash.c
+++ b/drivers/staging/lustre/lustre/libcfs/hash.c
@@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                             cfs_hash_dhead_t, dh_head);
  
         if (dh->dh_tail != NULL) /* not empty */
-               hlist_add_after(dh->dh_tail, hnode);
+               hlist_add_behind(hnode, dh->dh_tail);
         else /* empty list */
                 hlist_add_head(hnode, &dh->dh_head);
         dh->dh_tail = hnode;
@@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                                 cfs_hash_dhead_dep_t, dd_head);
  
         if (dh->dd_tail != NULL) /* not empty */
-               hlist_add_after(dh->dd_tail, hnode);
+               hlist_add_behind(hnode, dh->dd_tail);
         else /* empty list */
                 hlist_add_head(hnode, &dh->dd_head);
         dh->dd_tail = hnode;
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c

index 454b65898e2c6eaa9434b1df84a0958926a0e06d..42bad18c66c938be9cf3c0de786be18c77692454 100644 (file)
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -355,7 +355,7 @@ static struct sysrq_key_op sysrq_term_op = {
  
  static void moom_callback(struct work_struct *ignored)
  {
-       out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL,
+       out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL,
                       0, NULL, true);
  }
  
diff --git a/fs/fscache/main.c b/fs/fscache/main.c

index a31b83c5cbd9ad80e7b5967b48d0c8d1181cbd00..b39d487ccfb0e314096f47515d16ea165d215cc5 100644 (file)
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write,
         return ret;
  }
  
-struct ctl_table fscache_sysctls[] = {
+static struct ctl_table fscache_sysctls[] = {
         {
                 .procname       = "object_max_active",
                 .data           = &fscache_object_max_active,
@@ -87,7 +87,7 @@ struct ctl_table fscache_sysctls[] = {
         {}
  };
  
-struct ctl_table fscache_sysctls_root[] = {
+static struct ctl_table fscache_sysctls_root[] = {
         {
                 .procname       = "fscache",
                 .mode           = 0555,
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c

index 48140315f62770d622c628f906dede95a6c6bda0..380d86e1ab450b2ed02c9011ec9843ba0e2d2f6c 100644 (file)
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1019,11 +1019,11 @@ static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
  /**
   * logfs_is_valid_block - check whether this block is still valid
   *
- * @sb - superblock
- * @ofs        - block physical offset
- * @ino        - block inode number
- * @bix        - block index
- * @level - block level
+ * @sb:                superblock
+ * @ofs:       block physical offset
+ * @ino:       block inode number
+ * @bix:       block index
+ * @gc_level:  block level
   *
   * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
   * become invalid once the journal is written.
@@ -2226,10 +2226,9 @@ void btree_write_block(struct logfs_block *block)
   *
   * @inode:             parent inode (ifile or directory)
   * @buf:               object to write (inode or dentry)
- * @n:                 object size
- * @_pos:              object number (file position in blocks/objects)
+ * @count:             object size
+ * @bix:               block index
   * @flags:             write flags
- * @lock:              0 if write lock is already taken, 1 otherwise
   * @shadow_tree:       shadow below this inode
   *
   * FIXME: All caller of this put a 200-300 byte variable on the stack,
diff --git a/fs/namespace.c b/fs/namespace.c

index 182bc41cd88711d593c4d997171c6ad483a87577..2a1447c946e7c212071eac5243a5538a499cd0fa 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,7 +798,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
         list_splice(&head, n->list.prev);
  
         if (shadows)
-               hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+               hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
         else
                 hlist_add_head_rcu(&mnt->mnt_hash,
                                 m_hash(&parent->mnt, mnt->mnt_mountpoint));
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c

index ee9cb3795c2b14c53179d10f485e33ed613d0c16..30d3addfad7583c722b281675c4e14af76baadd3 100644 (file)
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -70,8 +70,15 @@ static int fanotify_get_response(struct fsnotify_group *group,
         wait_event(group->fanotify_data.access_waitq, event->response ||
                                 atomic_read(&group->fanotify_data.bypass_perm));
  
-       if (!event->response) /* bypass_perm set */
+       if (!event->response) { /* bypass_perm set */
+               /*
+                * Event was canceled because group is being destroyed. Remove
+                * it from group's event list because we are responsible for
+                * freeing the permission event.
+                */
+               fsnotify_remove_event(group, &event->fae.fse);
                 return 0;
+       }
  
         /* userspace responded, convert to something usable */
         switch (event->response) {
@@ -210,7 +217,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
                 return -ENOMEM;
  
         fsn_event = &event->fse;
-       ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge);
+       ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
         if (ret) {
                 /* Permission events shouldn't be merged */
                 BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c

index 3fdc8a3e113464b71dde78cd43ad6357e037df36..b13992a41bd94312eeecd59b4e0841a58d1d85c0 100644 (file)
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -66,7 +66,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
  
         /* held the notification_mutex the whole time, so this is the
          * same event we peeked above */
-       return fsnotify_remove_notify_event(group);
+       return fsnotify_remove_first_event(group);
  }
  
  static int create_fd(struct fsnotify_group *group,
@@ -359,6 +359,11 @@ static int fanotify_release(struct inode *ignored, struct file *file)
  #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
         struct fanotify_perm_event_info *event, *next;
  
+       /*
+        * There may be still new events arriving in the notification queue
+        * but since userspace cannot use fanotify fd anymore, no event can
+        * enter or leave access_list by now.
+        */
         spin_lock(&group->fanotify_data.access_lock);
  
         atomic_inc(&group->fanotify_data.bypass_perm);
@@ -373,6 +378,13 @@ static int fanotify_release(struct inode *ignored, struct file *file)
         }
         spin_unlock(&group->fanotify_data.access_lock);
  
+       /*
+        * Since bypass_perm is set, newly queued events will not wait for
+        * access response. Wake up the already sleeping ones now.
+        * synchronize_srcu() in fsnotify_destroy_group() will wait for all
+        * processes sleeping in fanotify_handle_event() waiting for access
+        * response and thus also for all permission events to be freed.
+        */
         wake_up(&group->fanotify_data.access_waitq);
  #endif
  
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c

index 74825be65b7bbec7eec8df37dda142479c00eb3d..9ce062218de9cf2559c02b92a7a201224d1d67fc 100644 (file)
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
  
         BUG_ON(last == NULL);
         /* mark should be the last entry.  last is the current last entry */
-       hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
+       hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
  out:
         fsnotify_recalc_inode_mask_locked(inode);
         spin_unlock(&inode->i_lock);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c

index 43ab1e1a07a20acaca5a4741487ac5121e00bbd4..0f88bc0b4e6cfd31fbcd030c5256384fa844b265 100644 (file)
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -108,7 +108,7 @@ int inotify_handle_event(struct fsnotify_group *group,
         if (len)
                 strcpy(event->name, file_name);
  
-       ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
+       ret = fsnotify_add_event(group, fsn_event, inotify_merge);
         if (ret) {
                 /* Our event wasn't used in the end. Free it. */
                 fsnotify_destroy_event(group, fsn_event);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c

index cc423a30a0c804321d4fb6c62558a1dc4b786e73..daf76652fe58c0534f355fd59f11ffbddcc43097 100644 (file)
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -149,7 +149,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
         if (fsnotify_notify_queue_is_empty(group))
                 return NULL;
  
-       event = fsnotify_peek_notify_event(group);
+       event = fsnotify_peek_first_event(group);
  
         pr_debug("%s: group=%p event=%p\n", __func__, group, event);
  
@@ -159,7 +159,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
  
         /* held the notification_mutex the whole time, so this is the
          * same event we peeked above */
-       fsnotify_remove_notify_event(group);
+       fsnotify_remove_first_event(group);
  
         return event;
  }
diff --git a/fs/notify/notification.c b/fs/notify/notification.c

index 1e58402171a56cd9d078ab62307951689a5630df..a95d8e037aebe24ba36421861d3abaad6f7dfc89 100644 (file)
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -73,7 +73,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
         /* Overflow events are per-group and we don't want to free them */
         if (!event || event->mask == FS_Q_OVERFLOW)
                 return;
-
+       /* If the event is still queued, we have a problem... */
+       WARN_ON(!list_empty(&event->list));
         group->ops->free_event(event);
  }
  
@@ -83,10 +84,10 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
   * added to the queue, 1 if the event was merged with some other queued event,
   * 2 if the queue of events has overflown.
   */
-int fsnotify_add_notify_event(struct fsnotify_group *group,
-                             struct fsnotify_event *event,
-                             int (*merge)(struct list_head *,
-                                          struct fsnotify_event *))
+int fsnotify_add_event(struct fsnotify_group *group,
+                      struct fsnotify_event *event,
+                      int (*merge)(struct list_head *,
+                                   struct fsnotify_event *))
  {
         int ret = 0;
         struct list_head *list = &group->notification_list;
@@ -124,11 +125,26 @@ queue:
         return ret;
  }
  
+/*
+ * Remove @event from group's notification queue. It is the responsibility of
+ * the caller to destroy the event.
+ */
+void fsnotify_remove_event(struct fsnotify_group *group,
+                          struct fsnotify_event *event)
+{
+       mutex_lock(&group->notification_mutex);
+       if (!list_empty(&event->list)) {
+               list_del_init(&event->list);
+               group->q_len--;
+       }
+       mutex_unlock(&group->notification_mutex);
+}
+
  /*
   * Remove and return the first event from the notification list.  It is the
   * responsibility of the caller to destroy the obtained event
   */
-struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
  {
         struct fsnotify_event *event;
  
@@ -140,7 +156,7 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
                                  struct fsnotify_event, list);
         /*
          * We need to init list head for the case of overflow event so that
-        * check in fsnotify_add_notify_events() works
+        * check in fsnotify_add_event() works
          */
         list_del_init(&event->list);
         group->q_len--;
@@ -149,9 +165,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
  }
  
  /*
- * This will not remove the event, that must be done with fsnotify_remove_notify_event()
+ * This will not remove the event, that must be done with
+ * fsnotify_remove_first_event()
   */
-struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
  {
         BUG_ON(!mutex_is_locked(&group->notification_mutex));
  
@@ -169,7 +186,7 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
  
         mutex_lock(&group->notification_mutex);
         while (!fsnotify_notify_queue_is_empty(group)) {
-               event = fsnotify_remove_notify_event(group);
+               event = fsnotify_remove_first_event(group);
                 fsnotify_destroy_event(group, event);
         }
         mutex_unlock(&group->notification_mutex);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c

index 68ca5a8704b5a385e38ae696835dd831d3202691..ac851e8376b1931d88adcf4ff5eaa8bd2445a635 100644 (file)
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
  
         BUG_ON(last == NULL);
         /* mark should be the last entry.  last is the current last entry */
-       hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
+       hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
  out:
         fsnotify_recalc_vfsmount_mask_locked(mnt);
         spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c

index 5c9e2c81cb11db029ece7873766041ada8c65024..f5ec1ce7a53284969d600b4fb4027fc9e387d40d 100644 (file)
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -74,8 +74,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
   * ntfs_attr_extend_initialized - extend the initialized size of an attribute
   * @ni:                        ntfs inode of the attribute to extend
   * @new_init_size:     requested new initialized size in bytes
- * @cached_page:       store any allocated but unused page here
- * @lru_pvec:          lru-buffering pagevec of the caller
   *
   * Extend the initialized size of an attribute described by the ntfs inode @ni
   * to @new_init_size bytes.  This involves zeroing any non-sparse space between
@@ -395,7 +393,6 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
   * @nr_pages:  number of page cache pages to obtain
   * @pages:     array of pages in which to return the obtained page cache pages
   * @cached_page: allocated but as yet unused page
- * @lru_pvec:  lru-buffering pagevec of caller
   *
   * Obtain @nr_pages locked page cache pages from the mapping @mapping and
   * starting at index @index.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c

index 9d8fcf2f3b947f0fe2a5feda7c0a849f154f349c..a93bf98922565ab120d85f995f0cb565deb1e144 100644 (file)
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4961,6 +4961,15 @@ leftright:
  
                 el = path_leaf_el(path);
                 split_index = ocfs2_search_extent_list(el, cpos);
+               if (split_index == -1) {
+                       ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+                                       "Owner %llu has an extent at cpos %u "
+                                       "which can no longer be found.\n",
+                                       (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+                                       cpos);
+                       ret = -EROFS;
+                       goto out;
+               }
                 goto leftright;
         }
  out:
@@ -5135,7 +5144,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
         el = path_leaf_el(left_path);
  
         index = ocfs2_search_extent_list(el, cpos);
-       if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+       if (index == -1) {
                 ocfs2_error(sb,
                             "Owner %llu has an extent at cpos %u which can no "
                             "longer be found.\n",
@@ -5491,7 +5500,7 @@ int ocfs2_remove_extent(handle_t *handle,
  
         el = path_leaf_el(path);
         index = ocfs2_search_extent_list(el, cpos);
-       if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+       if (index == -1) {
                 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
                             "Owner %llu has an extent at cpos %u which can no "
                             "longer be found.\n",
@@ -5557,7 +5566,7 @@ int ocfs2_remove_extent(handle_t *handle,
  
                 el = path_leaf_el(path);
                 index = ocfs2_search_extent_list(el, cpos);
-               if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+               if (index == -1) {
                         ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
                                     "Owner %llu: split at cpos %u lost record.",
                                     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c

index 39efc5057a36d3106391012925e2a8a56758e24e..3fcf205ee900acb87eaa5f74f7c28949ee95bec4 100644 (file)
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1923,12 +1923,11 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
                                 goto bail;
                         }
  
-                       if (total_backoff >
-                           msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+                       if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) {
                                 status = -ERESTARTSYS;
                                 mlog(ML_NOTICE, "Timed out joining dlm domain "
                                      "%s after %u msecs\n", dlm->name,
-                                    jiffies_to_msecs(total_backoff));
+                                    total_backoff);
                                 goto bail;
                         }
  
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c

index 82abf0cc9a12e2fbc8531f2542830d07ed100293..3ec906ef5d9a622ff4b130f12907d503bd89c3e0 100644 (file)
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2405,6 +2405,10 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
         if (res->state & DLM_LOCK_RES_MIGRATING)
                 return 0;
  
+       /* delay migration when the lockres is in RECOCERING state */
+       if (res->state & DLM_LOCK_RES_RECOVERING)
+               return 0;
+
         if (res->owner != dlm->node_num)
                 return 0;
  
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c

index 599eb4c4c8beedaec8c18649b47cc2c1fe487680..6219aaadeb08dadeb44ebdacb2b2559edfc52d7c 100644 (file)
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,7 @@ static int __ocfs2_move_extent(handle_t *handle,
         el = path_leaf_el(path);
  
         index = ocfs2_search_extent_list(el, cpos);
-       if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+       if (index == -1) {
                 ocfs2_error(inode->i_sb,
                             "Inode %llu has an extent at cpos %u which can no "
                             "longer be found.\n",
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c

index 636aab69ead559f718a9ebef4e6ca5e8dfad933e..d81f6e2a97f5d4c70d6324cf0a7483a874cd7b9a 100644 (file)
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3109,7 +3109,7 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
         el = path_leaf_el(path);
  
         index = ocfs2_search_extent_list(el, cpos);
-       if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+       if (index == -1) {
                 ocfs2_error(sb,
                             "Inode %llu has an extent at cpos %u which can no "
                             "longer be found.\n",
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c

index 1424c151cccce0170819ce4e0f36dad7d97461b8..a88b2a4fcc85171210cd17f8e4f9a791d5aa1d2b 100644 (file)
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -382,7 +382,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
  
         trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
  
-       si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
+       si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
                             GFP_KERNEL);
         if (!si->si_bh) {
                 status = -ENOMEM;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c

index 7445af0b1aa341adb9008d05494890f11c4a4568..aa1eee06420f677b9f5a3594dd66b19567e46295 100644 (file)
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -168,7 +168,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                 K(global_page_state(NR_WRITEBACK)),
                 K(global_page_state(NR_ANON_PAGES)),
                 K(global_page_state(NR_FILE_MAPPED)),
-               K(global_page_state(NR_SHMEM)),
+               K(i.sharedram),
                 K(global_page_state(NR_SLAB_RECLAIMABLE) +
                                 global_page_state(NR_SLAB_UNRECLAIMABLE)),
                 K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c

index cfa63ee92c96c9e8b715fc73864ca81053c2b237..dfc791c42d6491c6d1a4537e6ee5d7c9bd21a24b 100644 (file)
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -925,15 +925,30 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
                                 struct mm_walk *walk)
  {
         struct pagemapread *pm = walk->private;
-       unsigned long addr;
+       unsigned long addr = start;
         int err = 0;
-       pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
  
-       for (addr = start; addr < end; addr += PAGE_SIZE) {
-               err = add_to_pagemap(addr, &pme, pm);
-               if (err)
-                       break;
+       while (addr < end) {
+               struct vm_area_struct *vma = find_vma(walk->mm, addr);
+               pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+               unsigned long vm_end;
+
+               if (!vma) {
+                       vm_end = end;
+               } else {
+                       vm_end = min(end, vma->vm_end);
+                       if (vma->vm_flags & VM_SOFTDIRTY)
+                               pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY);
+               }
+
+               for (; addr < vm_end; addr += PAGE_SIZE) {
+                       err = add_to_pagemap(addr, &pme, pm);
+                       if (err)
+                               goto out;
+               }
         }
+
+out:
         return err;
  }
  
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c

index 62a0de6632e1aa3c8de599e19db3f8d7e2a383e2..43e7a7eddac03cf7ca659cd891ca219bf702aade 100644 (file)
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
  
         pages = end_index - start_index + 1;
  
-       page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+       page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
         if (page == NULL)
                 return res;
  
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c

index 031c8d67fd5178bb5afca2b04c71637254b46873..5056babe00df93249465c22b8b6dc6d0ebc1723d 100644 (file)
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -27,6 +27,8 @@
   * the filesystem.
   */
  
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
  #include <linux/fs.h>
  #include <linux/vfs.h>
  #include <linux/slab.h>
@@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void)
                 return err;
         }
  
-       printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
-               "Phillip Lougher\n");
+       pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
  
         return 0;
  }
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h

index 7ad634501e48cd7d39b1e8b621f0affce26765d7..e1c8d080c4271d655f75a771ec71ab6ee81c66e3 100644 (file)
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -88,32 +88,32 @@
   * lib/bitmap.c provides these functions:
   */
  
-extern int __bitmap_empty(const unsigned long *bitmap, int bits);
-extern int __bitmap_full(const unsigned long *bitmap, int bits);
+extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
+extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
  extern int __bitmap_equal(const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                         const unsigned long *bitmap2, unsigned int nbits);
  extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
-                       int bits);
+                       unsigned int nbits);
  extern void __bitmap_shift_right(unsigned long *dst,
                          const unsigned long *src, int shift, int bits);
  extern void __bitmap_shift_left(unsigned long *dst,
                          const unsigned long *src, int shift, int bits);
  extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
  extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
  extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
  extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
  extern int __bitmap_intersects(const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
  extern int __bitmap_subset(const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
-extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
+extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
  
-extern void bitmap_set(unsigned long *map, int i, int len);
-extern void bitmap_clear(unsigned long *map, int start, int nr);
+extern void bitmap_set(unsigned long *map, unsigned int start, int len);
+extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
  extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
                                          unsigned long size,
                                          unsigned long start,
@@ -140,9 +140,9 @@ extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
                 const unsigned long *relmap, int bits);
  extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
                 int sz, int bits);
-extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
-extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
-extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
+extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
+extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
+extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
  extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
  extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
  
@@ -188,15 +188,15 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
  }
  
  static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
  {
         if (small_const_nbits(nbits))
-               return (*dst = *src1 & *src2) != 0;
+               return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
         return __bitmap_and(dst, src1, src2, nbits);
  }
  
  static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
  {
         if (small_const_nbits(nbits))
                 *dst = *src1 | *src2;
@@ -205,7 +205,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
  }
  
  static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
  {
         if (small_const_nbits(nbits))
                 *dst = *src1 ^ *src2;
@@ -214,24 +214,24 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
  }
  
  static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
  {
         if (small_const_nbits(nbits))
-               return (*dst = *src1 & ~(*src2)) != 0;
+               return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
         return __bitmap_andnot(dst, src1, src2, nbits);
  }
  
  static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
-                       int nbits)
+                       unsigned int nbits)
  {
         if (small_const_nbits(nbits))
-               *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+               *dst = ~(*src);
         else
                 __bitmap_complement(dst, src, nbits);
  }
  
  static inline int bitmap_equal(const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
  {
         if (small_const_nbits(nbits))
                 return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -240,7 +240,7 @@ static inline int bitmap_equal(const unsigned long *src1,
  }
  
  static inline int bitmap_intersects(const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
  {
         if (small_const_nbits(nbits))
                 return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -249,7 +249,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
  }
  
  static inline int bitmap_subset(const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
  {
         if (small_const_nbits(nbits))
                 return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
@@ -257,7 +257,7 @@ static inline int bitmap_subset(const unsigned long *src1,
                 return __bitmap_subset(src1, src2, nbits);
  }
  
-static inline int bitmap_empty(const unsigned long *src, int nbits)
+static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
  {
         if (small_const_nbits(nbits))
                 return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -265,7 +265,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
                 return __bitmap_empty(src, nbits);
  }
  
-static inline int bitmap_full(const unsigned long *src, int nbits)
+static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
  {
         if (small_const_nbits(nbits))
                 return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -273,7 +273,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
                 return __bitmap_full(src, nbits);
  }
  
-static inline int bitmap_weight(const unsigned long *src, int nbits)
+static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
  {
         if (small_const_nbits(nbits))
                 return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -284,7 +284,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
                         const unsigned long *src, int n, int nbits)
  {
         if (small_const_nbits(nbits))
-               *dst = *src >> n;
+               *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n;
         else
                 __bitmap_shift_right(dst, src, n, nbits);
  }
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h

index 0846e6b931cefc9ea9baedfe9bfc93300e4c474b..89f67c1c316064614fca10a47a3be5319bbe874d 100644 (file)
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -2,7 +2,7 @@
  #define _LINUX_BYTEORDER_GENERIC_H
  
  /*
- * linux/byteorder_generic.h
+ * linux/byteorder/generic.h
   * Generic Byte-reordering support
   *
   * The "... p" macros, like le64_to_cpup, can be used with pointers
diff --git a/include/linux/cma.h b/include/linux/cma.h

new file mode 100644 (file)

index 0000000..371b930
--- /dev/null
+++ b/include/linux/cma.h
@@ -0,0 +1,27 @@
+#ifndef __CMA_H__
+#define __CMA_H__
+
+/*
+ * There is always at least global CMA area and a few optional
+ * areas configured in kernel .config.
+ */
+#ifdef CONFIG_CMA_AREAS
+#define MAX_CMA_AREAS  (1 + CONFIG_CMA_AREAS)
+
+#else
+#define MAX_CMA_AREAS  (0)
+
+#endif
+
+struct cma;
+
+extern phys_addr_t cma_get_base(struct cma *cma);
+extern unsigned long cma_get_size(struct cma *cma);
+
+extern int __init cma_declare_contiguous(phys_addr_t size,
+                       phys_addr_t base, phys_addr_t limit,
+                       phys_addr_t alignment, unsigned int order_per_bit,
+                       bool fixed, struct cma **res_cma);
+extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
+extern bool cma_release(struct cma *cma, struct page *pages, int count);
+#endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h

index 772eab5d524abb7e6e2bff2fa60cb8fadaad7054..569bbd039896f330923d53b4a6231ba1c73e70cc 100644 (file)
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -53,18 +53,13 @@
  
  #ifdef __KERNEL__
  
+#include <linux/device.h>
+
  struct cma;
  struct page;
-struct device;
  
  #ifdef CONFIG_DMA_CMA
  
-/*
- * There is always at least global CMA area and a few optional device
- * private areas configured in kernel .config.
- */
-#define MAX_CMA_AREAS  (1 + CONFIG_CMA_AREAS)
-
  extern struct cma *dma_contiguous_default_area;
  
  static inline struct cma *dev_get_cma_area(struct device *dev)
@@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
  
  #else
  
-#define MAX_CMA_AREAS  (0)
-
  static inline struct cma *dev_get_cma_area(struct device *dev)
  {
         return NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 2daccaf4b547cc293a5315d7a112de1851959c89..1ab6c6913040523cb4a097b3a0bef71ce4fe4dab 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2688,7 +2688,7 @@ static const struct file_operations __fops = {                            \
         .read    = simple_attr_read,                                    \
         .write   = simple_attr_write,                                   \
         .llseek  = generic_file_llseek,                                 \
-};
+}
  
  static inline __printf(1, 2)
  void __simple_attr_check_format(const char *fmt, ...)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h

index fc7718c6bd3ebae5fe6dd3a6c9ee616285977204..ca060d7c4fa63e3395c4754ed8e8f5ab32ceff30 100644 (file)
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -322,16 +322,18 @@ extern int fsnotify_fasync(int fd, struct file *file, int on);
  extern void fsnotify_destroy_event(struct fsnotify_group *group,
                                    struct fsnotify_event *event);
  /* attach the event to the group notification queue */
-extern int fsnotify_add_notify_event(struct fsnotify_group *group,
-                                    struct fsnotify_event *event,
-                                    int (*merge)(struct list_head *,
-                                                 struct fsnotify_event *));
+extern int fsnotify_add_event(struct fsnotify_group *group,
+                             struct fsnotify_event *event,
+                             int (*merge)(struct list_head *,
+                                          struct fsnotify_event *));
+/* Remove passed event from groups notification queue */
+extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
  /* true if the group notification queue is empty */
  extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
  /* return, but do not dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
  /* return AND dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group);
  
  /* functions used to manipulate the marks attached to inodes */
  
diff --git a/include/linux/gfp.h b/include/linux/gfp.h

index 6eb1fb37de9a452534e1a1f42cdc2f10410bf600..5e7219dc0fae44968c452d9e8e6b642435cf74b6 100644 (file)
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
  void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
  void free_pages_exact(void *virt, size_t size);
  /* This is different from alloc_pages_exact_node !!! */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
  
  #define __get_free_page(gfp_mask) \
                 __get_free_pages((gfp_mask), 0)
diff --git a/include/linux/glob.h b/include/linux/glob.h

new file mode 100644 (file)

index 0000000..861d834
--- /dev/null
+++ b/include/linux/glob.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_GLOB_H
+#define _LINUX_GLOB_H
+
+#include <linux/types.h>       /* For bool */
+#include <linux/compiler.h>    /* For __pure */
+
+bool __pure glob_match(char const *pat, char const *str);
+
+#endif /* _LINUX_GLOB_H */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h

index 7fb31da45d03bbb40f1840159fc646736c187fb3..9286a46b7d69b539f027bcc890b3be976d20f228 100644 (file)
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -93,7 +93,7 @@ static inline int kmap_atomic_idx_push(void)
  
  #ifdef CONFIG_DEBUG_HIGHMEM
         WARN_ON_ONCE(in_irq() && !irqs_disabled());
-       BUG_ON(idx > KM_TYPE_NR);
+       BUG_ON(idx >= KM_TYPE_NR);
  #endif
         return idx;
  }
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h

index b826239bdce0b26a614ae0802782860348dd6fc6..63579cb8d3dcfb5b7d36b80cc19d784e1522e3c9 100644 (file)
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
  #endif /* CONFIG_DEBUG_VM */
  
  extern unsigned long transparent_hugepage_flags;
-extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-                         pmd_t *dst_pmd, pmd_t *src_pmd,
-                         struct vm_area_struct *vma,
-                         unsigned long addr, unsigned long end);
  extern int split_huge_page_to_list(struct page *page, struct list_head *list);
  static inline int split_huge_page(struct page *page)
  {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index a23c096b30807c20db8d967dbb130f4842fa44e2..6e6d338641fe7efc91641df82c494645526428a7 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
  #endif
  
  extern unsigned long hugepages_treat_as_movable;
-extern const unsigned long hugetlb_zero, hugetlb_infinity;
  extern int sysctl_hugetlb_shm_group;
  extern struct list_head huge_boot_pages;
  
diff --git a/include/linux/kernel.h b/include/linux/kernel.h

index a9e2268ecccb0c0f96a02add9b7f122421f223b6..3dc22abbc68a212023d90d54556a94d8026bcfcb 100644 (file)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -493,11 +493,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
         return buf;
  }
  
-static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
-{
-       return hex_byte_pack(buf, byte);
-}
-
  extern int hex_to_bin(char ch);
  extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
  
diff --git a/include/linux/klist.h b/include/linux/klist.h

index a370ce57cf1d98e3e707a6a40c77b9318f4fc9ac..61e5b723ae73dbb1a424665c6e8a5d0fd8f9b2a7 100644 (file)
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -44,7 +44,7 @@ struct klist_node {
  
  extern void klist_add_tail(struct klist_node *n, struct klist *k);
  extern void klist_add_head(struct klist_node *n, struct klist *k);
-extern void klist_add_after(struct klist_node *n, struct klist_node *pos);
+extern void klist_add_behind(struct klist_node *n, struct klist_node *pos);
  extern void klist_add_before(struct klist_node *n, struct klist_node *pos);
  
  extern void klist_del(struct klist_node *n);
diff --git a/include/linux/list.h b/include/linux/list.h

index ef959417106222d475fbc01c419cf42db05dcca6..cbbb96fcead9208da224866d849f3b3dfcf0337e 100644 (file)
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n,
         *(n->pprev) = n;
  }
  
-static inline void hlist_add_after(struct hlist_node *n,
-                                       struct hlist_node *next)
+static inline void hlist_add_behind(struct hlist_node *n,
+                                   struct hlist_node *prev)
  {
-       next->next = n->next;
-       n->next = next;
-       next->pprev = &n->next;
+       n->next = prev->next;
+       prev->next = n;
+       n->pprev = &prev->next;
  
-       if(next->next)
-               next->next->pprev  = &next->next;
+       if (n->next)
+               n->next->pprev  = &n->next;
  }
  
  /* after that we'll appear to be on some hlist and hlist_del will work */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h

index b660e05b63d4fbc90e211d569f6d0229a2041fcc..e8cc45307f8f0d0f9897f0eb36d96b3c9fa7655c 100644 (file)
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
  /*
   * Set the allocation direction to bottom-up or top-down.
   */
-static inline void memblock_set_bottom_up(bool enable)
+static inline void __init memblock_set_bottom_up(bool enable)
  {
         memblock.bottom_up = enable;
  }
@@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void)
         return memblock.bottom_up;
  }
  #else
-static inline void memblock_set_bottom_up(bool enable) {}
+static inline void __init memblock_set_bottom_up(bool enable) {}
  static inline bool memblock_bottom_up(void) { return false; }
  #endif
  
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h

index 010d125bffbf5f41658878fe0dc6a25f062d1660..d9524c49d767b21a44c5513c2a31a3e47312d729 100644 (file)
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,11 +26,12 @@ enum {
         MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
  };
  
-/* Types for control the zone type of onlined memory */
+/* Types for control the zone type of onlined and offlined memory */
  enum {
-       ONLINE_KEEP,
-       ONLINE_KERNEL,
-       ONLINE_MOVABLE,
+       MMOP_OFFLINE = -1,
+       MMOP_ONLINE_KEEP,
+       MMOP_ONLINE_KERNEL,
+       MMOP_ONLINE_MOVABLE,
  };
  
  /*
@@ -258,6 +259,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
  extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
                 void *arg, int (*func)(struct memory_block *, void *));
  extern int add_memory(int nid, u64 start, u64 size);
+extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
  extern int arch_add_memory(int nid, u64 start, u64 size);
  extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
  extern bool is_memblock_offlined(struct memory_block *mem);
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h

index edd82a105220621a240f98c3097f8e24c145a38c..2f348d02f640fb049ed9c7e17b2742ba7e10f403 100644 (file)
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,11 +20,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
         } while (0)
  #define VM_WARN_ON(cond) WARN_ON(cond)
  #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
+#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
  #else
  #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
  #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
  #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
  #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
  #endif
  
  #ifdef CONFIG_DEBUG_VIRTUAL
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h

index deca87452528b2888823a4daf4cb148d43a52544..27288692241eebe928d9ec6020dc3c42d13f3755 100644 (file)
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -170,6 +170,8 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn,
                                    struct mm_struct *mm);
  extern void mmu_notifier_unregister(struct mmu_notifier *mn,
                                     struct mm_struct *mm);
+extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+                                              struct mm_struct *mm);
  extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
  extern void __mmu_notifier_release(struct mm_struct *mm);
  extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
@@ -288,6 +290,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
         set_pte_at(___mm, ___address, __ptep, ___pte);                  \
  })
  
+extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
+                                  void (*func)(struct rcu_head *rcu));
+extern void mmu_notifier_synchronize(void);
+
  #else /* CONFIG_MMU_NOTIFIER */
  
  static inline void mmu_notifier_release(struct mm_struct *mm)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 6cbd1b6c3d2071e27bdce852b8bdb152f7fd0710..318df70518509249bb67a8e9811bf20fdf5fc519 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -143,6 +143,7 @@ enum zone_stat_item {
         NR_SHMEM,               /* shmem pages (included tmpfs/GEM pages) */
         NR_DIRTIED,             /* page dirtyings since bootup */
         NR_WRITTEN,             /* page writings since bootup */
+       NR_PAGES_SCANNED,       /* pages scanned since last reclaim */
  #ifdef CONFIG_NUMA
         NUMA_HIT,               /* allocated in intended node */
         NUMA_MISS,              /* allocated in non intended node */
@@ -324,18 +325,11 @@ enum zone_type {
  #ifndef __GENERATING_BOUNDS_H
  
  struct zone {
-       /* Fields commonly accessed by the page allocator */
+       /* Read-mostly fields */
  
         /* zone watermarks, access with *_wmark_pages(zone) macros */
         unsigned long watermark[NR_WMARK];
  
-       /*
-        * When free pages are below this point, additional steps are taken
-        * when reading the number of free pages to avoid per-cpu counter
-        * drift allowing watermarks to be breached
-        */
-       unsigned long percpu_drift_mark;
-
         /*
          * We don't know if the memory that we're going to allocate will be freeable
          * or/and it will be released eventually, so to avoid totally wasting several
@@ -344,41 +338,26 @@ struct zone {
          * on the higher zones). This array is recalculated at runtime if the
          * sysctl_lowmem_reserve_ratio sysctl changes.
          */
-       unsigned long           lowmem_reserve[MAX_NR_ZONES];
-
-       /*
-        * This is a per-zone reserve of pages that should not be
-        * considered dirtyable memory.
-        */
-       unsigned long           dirty_balance_reserve;
+       long lowmem_reserve[MAX_NR_ZONES];
  
  #ifdef CONFIG_NUMA
         int node;
+#endif
+
         /*
-        * zone reclaim becomes active if more unmapped pages exist.
+        * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+        * this zone's LRU.  Maintained by the pageout code.
          */
-       unsigned long           min_unmapped_pages;
-       unsigned long           min_slab_pages;
-#endif
+       unsigned int inactive_ratio;
+
+       struct pglist_data      *zone_pgdat;
         struct per_cpu_pageset __percpu *pageset;
+
         /*
-        * free areas of different sizes
+        * This is a per-zone reserve of pages that should not be
+        * considered dirtyable memory.
          */
-       spinlock_t              lock;
-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
-       /* Set to true when the PG_migrate_skip bits should be cleared */
-       bool                    compact_blockskip_flush;
-
-       /* pfn where compaction free scanner should start */
-       unsigned long           compact_cached_free_pfn;
-       /* pfn where async and sync compaction migration scanner should start */
-       unsigned long           compact_cached_migrate_pfn[2];
-#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
-       /* see spanned/present_pages for more description */
-       seqlock_t               span_seqlock;
-#endif
-       struct free_area        free_area[MAX_ORDER];
+       unsigned long           dirty_balance_reserve;
  
  #ifndef CONFIG_SPARSEMEM
         /*
@@ -388,74 +367,14 @@ struct zone {
         unsigned long           *pageblock_flags;
  #endif /* CONFIG_SPARSEMEM */
  
-#ifdef CONFIG_COMPACTION
-       /*
-        * On compaction failure, 1<<compact_defer_shift compactions
-        * are skipped before trying again. The number attempted since
-        * last failure is tracked with compact_considered.
-        */
-       unsigned int            compact_considered;
-       unsigned int            compact_defer_shift;
-       int                     compact_order_failed;
-#endif
-
-       ZONE_PADDING(_pad1_)
-
-       /* Fields commonly accessed by the page reclaim scanner */
-       spinlock_t              lru_lock;
-       struct lruvec           lruvec;
-
-       /* Evictions & activations on the inactive file list */
-       atomic_long_t           inactive_age;
-
-       unsigned long           pages_scanned;     /* since last reclaim */
-       unsigned long           flags;             /* zone flags, see below */
-
-       /* Zone statistics */
-       atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
-
-       /*
-        * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
-        * this zone's LRU.  Maintained by the pageout code.
-        */
-       unsigned int inactive_ratio;
-
-
-       ZONE_PADDING(_pad2_)
-       /* Rarely used or read-mostly fields */
-
+#ifdef CONFIG_NUMA
         /*
-        * wait_table           -- the array holding the hash table
-        * wait_table_hash_nr_entries   -- the size of the hash table array
-        * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
-        *
-        * The purpose of all these is to keep track of the people
-        * waiting for a page to become available and make them
-        * runnable again when possible. The trouble is that this
-        * consumes a lot of space, especially when so few things
-        * wait on pages at a given time. So instead of using
-        * per-page waitqueues, we use a waitqueue hash table.
-        *
-        * The bucket discipline is to sleep on the same queue when
-        * colliding and wake all in that wait queue when removing.
-        * When something wakes, it must check to be sure its page is
-        * truly available, a la thundering herd. The cost of a
-        * collision is great, but given the expected load of the
-        * table, they should be so rare as to be outweighed by the
-        * benefits from the saved space.
-        *
-        * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
-        * primary users of these fields, and in mm/page_alloc.c
-        * free_area_init_core() performs the initialization of them.
+        * zone reclaim becomes active if more unmapped pages exist.
          */
-       wait_queue_head_t       * wait_table;
-       unsigned long           wait_table_hash_nr_entries;
-       unsigned long           wait_table_bits;
+       unsigned long           min_unmapped_pages;
+       unsigned long           min_slab_pages;
+#endif /* CONFIG_NUMA */
  
-       /*
-        * Discontig memory support fields.
-        */
-       struct pglist_data      *zone_pgdat;
         /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
         unsigned long           zone_start_pfn;
  
@@ -500,9 +419,11 @@ struct zone {
          * adjust_managed_page_count() should be used instead of directly
          * touching zone->managed_pages and totalram_pages.
          */
+       unsigned long           managed_pages;
         unsigned long           spanned_pages;
         unsigned long           present_pages;
-       unsigned long           managed_pages;
+
+       const char              *name;
  
         /*
          * Number of MIGRATE_RESEVE page block. To maintain for just
@@ -510,10 +431,94 @@ struct zone {
          */
         int                     nr_migrate_reserve_block;
  
+#ifdef CONFIG_MEMORY_HOTPLUG
+       /* see spanned/present_pages for more description */
+       seqlock_t               span_seqlock;
+#endif
+
         /*
-        * rarely used fields:
+        * wait_table           -- the array holding the hash table
+        * wait_table_hash_nr_entries   -- the size of the hash table array
+        * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
+        *
+        * The purpose of all these is to keep track of the people
+        * waiting for a page to become available and make them
+        * runnable again when possible. The trouble is that this
+        * consumes a lot of space, especially when so few things
+        * wait on pages at a given time. So instead of using
+        * per-page waitqueues, we use a waitqueue hash table.
+        *
+        * The bucket discipline is to sleep on the same queue when
+        * colliding and wake all in that wait queue when removing.
+        * When something wakes, it must check to be sure its page is
+        * truly available, a la thundering herd. The cost of a
+        * collision is great, but given the expected load of the
+        * table, they should be so rare as to be outweighed by the
+        * benefits from the saved space.
+        *
+        * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+        * primary users of these fields, and in mm/page_alloc.c
+        * free_area_init_core() performs the initialization of them.
          */
-       const char              *name;
+       wait_queue_head_t       *wait_table;
+       unsigned long           wait_table_hash_nr_entries;
+       unsigned long           wait_table_bits;
+
+       ZONE_PADDING(_pad1_)
+
+       /* Write-intensive fields used from the page allocator */
+       spinlock_t              lock;
+
+       /* free areas of different sizes */
+       struct free_area        free_area[MAX_ORDER];
+
+       /* zone flags, see below */
+       unsigned long           flags;
+
+       ZONE_PADDING(_pad2_)
+
+       /* Write-intensive fields used by page reclaim */
+
+       /* Fields commonly accessed by the page reclaim scanner */
+       spinlock_t              lru_lock;
+       struct lruvec           lruvec;
+
+       /* Evictions & activations on the inactive file list */
+       atomic_long_t           inactive_age;
+
+       /*
+        * When free pages are below this point, additional steps are taken
+        * when reading the number of free pages to avoid per-cpu counter
+        * drift allowing watermarks to be breached
+        */
+       unsigned long percpu_drift_mark;
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+       /* pfn where compaction free scanner should start */
+       unsigned long           compact_cached_free_pfn;
+       /* pfn where async and sync compaction migration scanner should start */
+       unsigned long           compact_cached_migrate_pfn[2];
+#endif
+
+#ifdef CONFIG_COMPACTION
+       /*
+        * On compaction failure, 1<<compact_defer_shift compactions
+        * are skipped before trying again. The number attempted since
+        * last failure is tracked with compact_considered.
+        */
+       unsigned int            compact_considered;
+       unsigned int            compact_defer_shift;
+       int                     compact_order_failed;
+#endif
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+       /* Set to true when the PG_migrate_skip bits should be cleared */
+       bool                    compact_blockskip_flush;
+#endif
+
+       ZONE_PADDING(_pad3_)
+       /* Zone statistics */
+       atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
  } ____cacheline_internodealigned_in_smp;
  
  typedef enum {
@@ -529,6 +534,7 @@ typedef enum {
         ZONE_WRITEBACK,                 /* reclaim scanning has recently found
                                          * many pages under writeback
                                          */
+       ZONE_FAIR_DEPLETED,             /* fair zone policy batch depleted */
  } zone_flags_t;
  
  static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -566,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone)
         return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
  }
  
+static inline int zone_is_fair_depleted(const struct zone *zone)
+{
+       return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
+}
+
  static inline int zone_is_oom_locked(const struct zone *zone)
  {
         return test_bit(ZONE_OOM_LOCKED, &zone->flags);
@@ -872,6 +883,8 @@ static inline int zone_movable_is_highmem(void)
  {
  #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
         return movable_zone == ZONE_HIGHMEM;
+#elif defined(CONFIG_HIGHMEM)
+       return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
  #else
         return 0;
  #endif
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h

index 58b9a02c38d29b118d98d7bbd22ac82dde9ec342..83a6aeda899d564843a8a6ea9a4bf090d3da1fe1 100644 (file)
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -430,7 +430,15 @@ static inline int num_node_state(enum node_states state)
         for_each_node_mask((__node), node_states[__state])
  
  #define first_online_node      first_node(node_states[N_ONLINE])
-#define next_online_node(nid)  next_node((nid), node_states[N_ONLINE])
+#define first_memory_node      first_node(node_states[N_MEMORY])
+static inline int next_online_node(int nid)
+{
+       return next_node(nid, node_states[N_ONLINE]);
+}
+static inline int next_memory_node(int nid)
+{
+       return next_node(nid, node_states[N_MEMORY]);
+}
  
  extern int nr_node_ids;
  extern int nr_online_nodes;
@@ -471,6 +479,7 @@ static inline int num_node_state(enum node_states state)
         for ( (node) = 0; (node) == 0; (node) = 1)
  
  #define first_online_node      0
+#define first_memory_node      0
  #define next_online_node(nid)  (MAX_NUMNODES)
  #define nr_node_ids            1
  #define nr_online_nodes                1
diff --git a/include/linux/oom.h b/include/linux/oom.h

index 4cd62677feb9eaf8f9a4647231663a8d7a42f986..647395a1a5508f7f138e80ad24afea4fd09638d3 100644 (file)
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -55,8 +55,8 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
                              struct mem_cgroup *memcg, nodemask_t *nodemask,
                              const char *message);
  
-extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
-extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
+extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags);
+extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags);
  
  extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
                                int order, const nodemask_t *nodemask);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h

index 8304959ad33641b892f05fb216466b691b871caa..e1f5fcd79792c6b121106618f087efa7130692b5 100644 (file)
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page)               \
  #define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)            \
         __SETPAGEFLAG(uname, lname)  __CLEARPAGEFLAG(uname, lname)
  
-#define PAGEFLAG_FALSE(uname)                                          \
-static inline int Page##uname(const struct page *page)                 \
-                       { return 0; }
-
  #define TESTSCFLAG(uname, lname)                                       \
         TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
  
+#define TESTPAGEFLAG_FALSE(uname)                                      \
+static inline int Page##uname(const struct page *page) { return 0; }
+
  #define SETPAGEFLAG_NOOP(uname)                                                \
  static inline void SetPage##uname(struct page *page) {  }
  
@@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) {  }
  #define __CLEARPAGEFLAG_NOOP(uname)                                    \
  static inline void __ClearPage##uname(struct page *page) {  }
  
+#define TESTSETFLAG_FALSE(uname)                                       \
+static inline int TestSetPage##uname(struct page *page) { return 0; }
+
  #define TESTCLEARFLAG_FALSE(uname)                                     \
  static inline int TestClearPage##uname(struct page *page) { return 0; }
  
  #define __TESTCLEARFLAG_FALSE(uname)                                   \
  static inline int __TestClearPage##uname(struct page *page) { return 0; }
  
+#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname)                        \
+       SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+
+#define TESTSCFLAG_FALSE(uname)                                                \
+       TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+
  struct page;   /* forward declaration */
  
  TESTPAGEFLAG(Locked, locked)
@@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem)
  PAGEFLAG(SwapCache, swapcache)
  #else
  PAGEFLAG_FALSE(SwapCache)
-       SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
  #endif
  
  PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
@@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
  PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
         TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
  #else
-PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
-       TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
+       TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
  #endif
  
  #ifdef CONFIG_ARCH_USES_PG_UNCACHED
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h

index e1474ae18c8847cba4a2f17c396c6e7b59167fed..3df8c7db7a4ec64e65b0ece15ad03a5fd58ccb30 100644 (file)
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
  /*
   * lock_page_or_retry - Lock the page, unless this would block and the
   * caller indicated that it can handle a retry.
+ *
+ * Return value and mmap_sem implications depend on flags; see
+ * __lock_page_or_retry().
   */
  static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
                                      unsigned int flags)
diff --git a/include/linux/printk.h b/include/linux/printk.h

index 319ff7e53efbc1f0bd551125bd70951c2a8c83ea..0990997a5304bb13f798bd79cd96097eb17681e5 100644 (file)
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -31,7 +31,7 @@ static inline const char *printk_skip_level(const char *buffer)
  }
  
  /* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
  
  /* We show everything that is MORE important than this.. */
  #define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h

index 8183b46fbaa2d6da9817ead257735396a6ec7b0c..372ad5e0dcb88df4af003686c7057af3e900d281 100644 (file)
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
  }
  
  /**
- * hlist_add_after_rcu
- * @prev: the existing element to add the new element after.
+ * hlist_add_behind_rcu
   * @n: the new element to add to the hash list.
+ * @prev: the existing element to add the new element after.
   *
   * Description:
   * Adds the specified element to the specified hlist
@@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
   * hlist_for_each_entry_rcu(), used to prevent memory-consistency
   * problems on Alpha CPUs.
   */
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
-                                      struct hlist_node *n)
+static inline void hlist_add_behind_rcu(struct hlist_node *n,
+                                       struct hlist_node *prev)
  {
         n->next = prev->next;
         n->pprev = &prev->next;
diff --git a/include/linux/swap.h b/include/linux/swap.h

index 4bdbee80eede2bb45e2615d3a875be4bb65659b3..1eb64043c076fe97db25443feb7ddb5bfe32e38c 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -311,7 +311,6 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail,
                          struct lruvec *lruvec, struct list_head *head);
  extern void activate_page(struct page *);
  extern void mark_page_accessed(struct page *);
-extern void init_page_accessed(struct page *page);
  extern void lru_add_drain(void);
  extern void lru_add_drain_cpu(int cpu);
  extern void lru_add_drain_all(void);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h

index 4b8a89189a296143a5d17eac9e5bec5dafa42afe..b87696fdf06ab14d14ff01f5cfd7b2650f1e6c96 100644 (file)
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -113,7 +113,7 @@ extern struct vm_struct *remove_vm_area(const void *addr);
  extern struct vm_struct *find_vm_area(const void *addr);
  
  extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
-                       struct page ***pages);
+                       struct page **pages);
  #ifdef CONFIG_MMU
  extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
                                     pgprot_t prot, struct page **pages);
diff --git a/include/linux/zbud.h b/include/linux/zbud.h

index 13af0d450bf6e088fc2b603fc0be2c35d8cbf7f6..f9d41a6e361f42f79a20bfd0c5b2f4579d61dcac 100644 (file)
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
  
  struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
  void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
         unsigned long *handle);
  void zbud_free(struct zbud_pool *pool, unsigned long handle);
  int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/include/linux/zlib.h b/include/linux/zlib.h

index 9c5a6b4de0a30d716b2ec72470813be3a2dd82a8..197abb2a54c5d713dd1a909da735bee60ab28d12 100644 (file)
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -493,64 +493,6 @@ extern int deflateInit2 (z_streamp strm,
     method). msg is set to null if there is no error message.  deflateInit2 does
     not perform any compression: this will be done by deflate().
  */
-                            
-#if 0
-extern int zlib_deflateSetDictionary (z_streamp strm,
-                                                    const Byte *dictionary,
-                                                    uInt  dictLength);
-#endif
-/*
-     Initializes the compression dictionary from the given byte sequence
-   without producing any compressed output. This function must be called
-   immediately after deflateInit, deflateInit2 or deflateReset, before any
-   call of deflate. The compressor and decompressor must use exactly the same
-   dictionary (see inflateSetDictionary).
-
-     The dictionary should consist of strings (byte sequences) that are likely
-   to be encountered later in the data to be compressed, with the most commonly
-   used strings preferably put towards the end of the dictionary. Using a
-   dictionary is most useful when the data to be compressed is short and can be
-   predicted with good accuracy; the data can then be compressed better than
-   with the default empty dictionary.
-
-     Depending on the size of the compression data structures selected by
-   deflateInit or deflateInit2, a part of the dictionary may in effect be
-   discarded, for example if the dictionary is larger than the window size in
-   deflate or deflate2. Thus the strings most likely to be useful should be
-   put at the end of the dictionary, not at the front.
-
-     Upon return of this function, strm->adler is set to the Adler32 value
-   of the dictionary; the decompressor may later use this value to determine
-   which dictionary has been used by the compressor. (The Adler32 value
-   applies to the whole dictionary even if only a subset of the dictionary is
-   actually used by the compressor.)
-
-     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent (for example if deflate has already been called for this stream
-   or if the compression method is bsort). deflateSetDictionary does not
-   perform any compression: this will be done by deflate().
-*/
-
-#if 0
-extern int zlib_deflateCopy (z_streamp dest, z_streamp source);
-#endif
-
-/*
-     Sets the destination stream as a complete copy of the source stream.
-
-     This function can be useful when several compression strategies will be
-   tried, for example when there are several ways of pre-processing the input
-   data with a filter. The streams that will be discarded should then be freed
-   by calling deflateEnd.  Note that deflateCopy duplicates the internal
-   compression state which can be quite large, so this strategy is slow and
-   can consume lots of memory.
-
-     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-   (such as zalloc being NULL). msg is left unchanged in both source and
-   destination.
-*/
  
  extern int zlib_deflateReset (z_streamp strm);
  /*
@@ -568,27 +510,6 @@ static inline unsigned long deflateBound(unsigned long s)
         return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
  }
  
-#if 0
-extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
-#endif
-/*
-     Dynamically update the compression level and compression strategy.  The
-   interpretation of level and strategy is as in deflateInit2.  This can be
-   used to switch between compression and straight copy of the input data, or
-   to switch to a different kind of input data requiring a different
-   strategy. If the compression level is changed, the input available so far
-   is compressed with the old level (and may be flushed); the new level will
-   take effect only at the next call of deflate().
-
-     Before the call of deflateParams, the stream state must be set as for
-   a call of deflate(), since the currently available input may have to
-   be compressed and flushed. In particular, strm->avail_out must be non-zero.
-
-     deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
-   stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
-   if strm->avail_out was zero.
-*/
-
  /*   
  extern int inflateInit2 (z_streamp strm, int  windowBits);
  
@@ -631,45 +552,6 @@ extern int inflateInit2 (z_streamp strm, int  windowBits);
     and avail_out are unchanged.)
  */
  
-extern int zlib_inflateSetDictionary (z_streamp strm,
-                                                    const Byte *dictionary,
-                                                    uInt  dictLength);
-/*
-     Initializes the decompression dictionary from the given uncompressed byte
-   sequence. This function must be called immediately after a call of inflate,
-   if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
-   can be determined from the adler32 value returned by that call of inflate.
-   The compressor and decompressor must use exactly the same dictionary (see
-   deflateSetDictionary).  For raw inflate, this function can be called
-   immediately after inflateInit2() or inflateReset() and before any call of
-   inflate() to set the dictionary.  The application must insure that the
-   dictionary that was used for compression is provided.
-
-     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
-   expected one (incorrect adler32 value). inflateSetDictionary does not
-   perform any decompression: this will be done by subsequent calls of
-   inflate().
-*/
-
-#if 0
-extern int zlib_inflateSync (z_streamp strm);
-#endif
-/* 
-    Skips invalid compressed data until a full flush point (see above the
-  description of deflate with Z_FULL_FLUSH) can be found, or until all
-  available input is skipped. No output is provided.
-
-    inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
-  if no more input was provided, Z_DATA_ERROR if no flush point has been found,
-  or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
-  case, the application may save the current current value of total_in which
-  indicates where valid compressed data was found. In the error case, the
-  application may repeatedly call inflateSync, providing more input each time,
-  until success or end of the input data.
-*/
-
  extern int zlib_inflateReset (z_streamp strm);
  /*
       This function is equivalent to inflateEnd followed by inflateInit,
diff --git a/include/linux/zpool.h b/include/linux/zpool.h

new file mode 100644 (file)

index 0000000..f14bd75
--- /dev/null
+++ b/include/linux/zpool.h
@@ -0,0 +1,106 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for the zbud and zsmalloc memory
+ * storage pool implementations.  Typically, this is used to
+ * store compressed memory.
+ */
+
+#ifndef _ZPOOL_H_
+#define _ZPOOL_H_
+
+struct zpool;
+
+struct zpool_ops {
+       int (*evict)(struct zpool *pool, unsigned long handle);
+};
+
+/*
+ * Control how a handle is mapped.  It will be ignored if the
+ * implementation does not support it.  Its use is optional.
+ * Note that this does not refer to memory protection, it
+ * refers to how the memory will be copied in/out if copying
+ * is necessary during mapping; read-write is the safest as
+ * it copies the existing memory in on map, and copies the
+ * changed memory back out on unmap.  Write-only does not copy
+ * in the memory and should only be used for initialization.
+ * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
+ */
+enum zpool_mapmode {
+       ZPOOL_MM_RW, /* normal read-write mapping */
+       ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
+       ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
+
+       ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
+};
+
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops);
+
+char *zpool_get_type(struct zpool *pool);
+
+void zpool_destroy_pool(struct zpool *pool);
+
+int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
+                       unsigned long *handle);
+
+void zpool_free(struct zpool *pool, unsigned long handle);
+
+int zpool_shrink(struct zpool *pool, unsigned int pages,
+                       unsigned int *reclaimed);
+
+void *zpool_map_handle(struct zpool *pool, unsigned long handle,
+                       enum zpool_mapmode mm);
+
+void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
+
+u64 zpool_get_total_size(struct zpool *pool);
+
+
+/**
+ * struct zpool_driver - driver implementation for zpool
+ * @type:      name of the driver.
+ * @list:      entry in the list of zpool drivers.
+ * @create:    create a new pool.
+ * @destroy:   destroy a pool.
+ * @malloc:    allocate mem from a pool.
+ * @free:      free mem from a pool.
+ * @shrink:    shrink the pool.
+ * @map:       map a handle.
+ * @unmap:     unmap a handle.
+ * @total_size:        get total size of a pool.
+ *
+ * This is created by a zpool implementation and registered
+ * with zpool.
+ */
+struct zpool_driver {
+       char *type;
+       struct module *owner;
+       atomic_t refcount;
+       struct list_head list;
+
+       void *(*create)(gfp_t gfp, struct zpool_ops *ops);
+       void (*destroy)(void *pool);
+
+       int (*malloc)(void *pool, size_t size, gfp_t gfp,
+                               unsigned long *handle);
+       void (*free)(void *pool, unsigned long handle);
+
+       int (*shrink)(void *pool, unsigned int pages,
+                               unsigned int *reclaimed);
+
+       void *(*map)(void *pool, unsigned long handle,
+                               enum zpool_mapmode mm);
+       void (*unmap)(void *pool, unsigned long handle);
+
+       u64 (*total_size)(void *pool);
+};
+
+void zpool_register_driver(struct zpool_driver *driver);
+
+int zpool_unregister_driver(struct zpool_driver *driver);
+
+int zpool_evict(void *pool, unsigned long handle);
+
+#endif
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h

index 4e4f2f8b1ac222a3673ddb316d30b21f7527c7bf..dd2b5467d905816e72c22026f109e971bbef8724 100644 (file)
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -17,6 +17,7 @@
         {MR_MEMORY_HOTPLUG,     "memory_hotplug"},              \
         {MR_SYSCALL,            "syscall_or_cpuset"},           \
         {MR_MEMPOLICY_MBIND,    "mempolicy_mbind"},             \
+       {MR_NUMA_MISPLACED,     "numa_misplaced"},              \
         {MR_CMA,                "cma"}
  
  TRACE_EVENT(mm_migrate_pages,
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h

index 1c9fabde69e4bb4aa5f13bc23a3b0d374549bbb6..ce0803b8d05f340f42116c1f129d08a645344e23 100644 (file)
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion,
  
         TP_PROTO(
                 struct page *page,
-               unsigned long pfn,
-               int lru,
-               unsigned long flags
+               int lru
         ),
  
-       TP_ARGS(page, pfn, lru, flags),
+       TP_ARGS(page, lru),
  
         TP_STRUCT__entry(
                 __field(struct page *,  page    )
@@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion,
  
         TP_fast_assign(
                 __entry->page   = page;
-               __entry->pfn    = pfn;
+               __entry->pfn    = page_to_pfn(page);
                 __entry->lru    = lru;
-               __entry->flags  = flags;
+               __entry->flags  = trace_pagemap_flags(page);
         ),
  
         /* Flag format is based on page-types.c formatting for pagemap */
@@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion,
  
  TRACE_EVENT(mm_lru_activate,
  
-       TP_PROTO(struct page *page, unsigned long pfn),
+       TP_PROTO(struct page *page),
  
-       TP_ARGS(page, pfn),
+       TP_ARGS(page),
  
         TP_STRUCT__entry(
                 __field(struct page *,  page    )
@@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate,
  
         TP_fast_assign(
                 __entry->page   = page;
-               __entry->pfn    = pfn;
+               __entry->pfn    = page_to_pfn(page);
         ),
  
         /* Flag format is based on page-types.c formatting for pagemap */
diff --git a/init/Kconfig b/init/Kconfig

index 41066e49e8809464651d8335de4a77165a864033..a291b7ef473893891481f188b81f7dfce1550942 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -807,15 +807,53 @@ config LOG_BUF_SHIFT
         range 12 21
         default 17
         help
-         Select kernel log buffer size as a power of 2.
+         Select the minimal kernel log buffer size as a power of 2.
+         The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
+         parameter, see below. Any higher size also might be forced
+         by "log_buf_len" boot parameter.
+
           Examples:
-                    17 => 128 KB
+                    17 => 128 KB
                      16 => 64 KB
-                    15 => 32 KB
-                    14 => 16 KB
+                    15 => 32 KB
+                    14 => 16 KB
                      13 =>  8 KB
                      12 =>  4 KB
  
+config LOG_CPU_MAX_BUF_SHIFT
+       int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
+       range 0 21
+       default 12 if !BASE_SMALL
+       default 0 if BASE_SMALL
+       help
+         This option allows to increase the default ring buffer size
+         according to the number of CPUs. The value defines the contribution
+         of each CPU as a power of 2. The used space is typically only few
+         lines however it might be much more when problems are reported,
+         e.g. backtraces.
+
+         The increased size means that a new buffer has to be allocated and
+         the original static one is unused. It makes sense only on systems
+         with more CPUs. Therefore this value is used only when the sum of
+         contributions is greater than the half of the default kernel ring
+         buffer as defined by LOG_BUF_SHIFT. The default values are set
+         so that more than 64 CPUs are needed to trigger the allocation.
+
+         Also this option is ignored when "log_buf_len" kernel parameter is
+         used as it forces an exact (power of two) size of the ring buffer.
+
+         The number of possible CPUs is used for this computation ignoring
+         hotplugging making the compuation optimal for the the worst case
+         scenerio while allowing a simple algorithm to be used from bootup.
+
+         Examples shift values and their meaning:
+                    17 => 128 KB for each CPU
+                    16 =>  64 KB for each CPU
+                    15 =>  32 KB for each CPU
+                    14 =>  16 KB for each CPU
+                    13 =>   8 KB for each CPU
+                    12 =>   4 KB for each CPU
+
  #
  # Architectures with an unreliable sched_clock() should select this:
  #
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c

index 8e9bc9c3dbb7ef49c360bf2775f57e0b74414cc6..c447cd9848d1bbce5f8e013c225b7fe9a26645fd 100644 (file)
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -106,7 +106,7 @@ static inline struct audit_entry *audit_init_entry(u32 field_count)
         if (unlikely(!entry))
                 return NULL;
  
-       fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL);
+       fields = kcalloc(field_count, sizeof(*fields), GFP_KERNEL);
         if (unlikely(!fields)) {
                 kfree(entry);
                 return NULL;
@@ -160,7 +160,7 @@ static __u32 *classes[AUDIT_SYSCALL_CLASSES];
  
  int __init audit_register_class(int class, unsigned *list)
  {
-       __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL);
+       __u32 *p = kcalloc(AUDIT_BITMASK_SIZE, sizeof(__u32), GFP_KERNEL);
         if (!p)
                 return -ENOMEM;
         while (*list != ~0U) {
diff --git a/kernel/exit.c b/kernel/exit.c

index e5c4668f1799d15ce1d527b1e357d795f3d748ea..88c6b3e425834e89943bb1a291a7a350333e3593 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -455,6 +455,7 @@ static void exit_mm(struct task_struct * tsk)
         task_unlock(tsk);
         mm_update_next_owner(mm);
         mmput(mm);
+       clear_thread_flag(TIF_MEMDIE);
  }
  
  /*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c

index 13e839dbca07ea72fb06d0b5ecbb97379c4a7f57..de1a6bb6861db5e92f0884e2b733cb5b6988679e 100644 (file)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -45,6 +45,7 @@
  #include <linux/poll.h>
  #include <linux/irq_work.h>
  #include <linux/utsname.h>
+#include <linux/ctype.h>
  
  #include <asm/uaccess.h>
  
@@ -56,7 +57,7 @@
  
  int console_printk[4] = {
         CONSOLE_LOGLEVEL_DEFAULT,       /* console_loglevel */
-       DEFAULT_MESSAGE_LOGLEVEL,       /* default_message_loglevel */
+       MESSAGE_LOGLEVEL_DEFAULT,       /* default_message_loglevel */
         CONSOLE_LOGLEVEL_MIN,           /* minimum_console_loglevel */
         CONSOLE_LOGLEVEL_DEFAULT,       /* default_console_loglevel */
  };
@@ -113,9 +114,9 @@ static int __down_trylock_console_sem(unsigned long ip)
   * This is used for debugging the mess that is the VT code by
   * keeping track if we have the console semaphore held. It's
   * definitely not the perfect debug tool (we don't know if _WE_
- * hold it are racing, but it helps tracking those weird code
- * path in the console code where we end up in places I want
- * locked without the console sempahore held
+ * hold it and are racing, but it helps tracking those weird code
+ * paths in the console code where we end up in places I want
+ * locked without the console sempahore held).
   */
  static int console_locked, console_suspended;
  
@@ -146,8 +147,8 @@ static int console_may_schedule;
   * the overall length of the record.
   *
   * The heads to the first and last entry in the buffer, as well as the
- * sequence numbers of these both entries are maintained when messages
- * are stored..
+ * sequence numbers of these entries are maintained when messages are
+ * stored.
   *
   * If the heads indicate available messages, the length in the header
   * tells the start next message. A length == 0 for the next message
@@ -257,7 +258,7 @@ static u64 clear_seq;
  static u32 clear_idx;
  
  #define PREFIX_MAX             32
-#define LOG_LINE_MAX           1024 - PREFIX_MAX
+#define LOG_LINE_MAX           (1024 - PREFIX_MAX)
  
  /* record buffer */
  #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
@@ -266,6 +267,7 @@ static u32 clear_idx;
  #define LOG_ALIGN __alignof__(struct printk_log)
  #endif
  #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
  static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
  static char *log_buf = __log_buf;
  static u32 log_buf_len = __LOG_BUF_LEN;
@@ -344,7 +346,7 @@ static int log_make_free_space(u32 msg_size)
         while (log_first_seq < log_next_seq) {
                 if (logbuf_has_space(msg_size, false))
                         return 0;
-               /* drop old messages until we have enough continuous space */
+               /* drop old messages until we have enough contiguous space */
                 log_first_idx = log_next(log_first_idx);
                 log_first_seq++;
         }
@@ -453,11 +455,7 @@ static int log_store(int facility, int level,
         return msg->text_len;
  }
  
-#ifdef CONFIG_SECURITY_DMESG_RESTRICT
-int dmesg_restrict = 1;
-#else
-int dmesg_restrict;
-#endif
+int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
  
  static int syslog_action_restricted(int type)
  {
@@ -828,34 +826,74 @@ void log_buf_kexec_setup(void)
  /* requested log_buf_len from kernel cmdline */
  static unsigned long __initdata new_log_buf_len;
  
-/* save requested log_buf_len since it's too early to process it */
-static int __init log_buf_len_setup(char *str)
+/* we practice scaling the ring buffer by powers of 2 */
+static void __init log_buf_len_update(unsigned size)
  {
-       unsigned size = memparse(str, &str);
-
         if (size)
                 size = roundup_pow_of_two(size);
         if (size > log_buf_len)
                 new_log_buf_len = size;
+}
+
+/* save requested log_buf_len since it's too early to process it */
+static int __init log_buf_len_setup(char *str)
+{
+       unsigned size = memparse(str, &str);
+
+       log_buf_len_update(size);
  
         return 0;
  }
  early_param("log_buf_len", log_buf_len_setup);
  
+static void __init log_buf_add_cpu(void)
+{
+       unsigned int cpu_extra;
+
+       /*
+        * archs should set up cpu_possible_bits properly with
+        * set_cpu_possible() after setup_arch() but just in
+        * case lets ensure this is valid.
+        */
+       if (num_possible_cpus() == 1)
+               return;
+
+       cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
+
+       /* by default this will only continue through for large > 64 CPUs */
+       if (cpu_extra <= __LOG_BUF_LEN / 2)
+               return;
+
+       pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
+               __LOG_CPU_MAX_BUF_LEN);
+       pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
+               cpu_extra);
+       pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
+
+       log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
+}
+
  void __init setup_log_buf(int early)
  {
         unsigned long flags;
         char *new_log_buf;
         int free;
  
+       if (log_buf != __log_buf)
+               return;
+
+       if (!early && !new_log_buf_len)
+               log_buf_add_cpu();
+
         if (!new_log_buf_len)
                 return;
  
         if (early) {
                 new_log_buf =
-                       memblock_virt_alloc(new_log_buf_len, PAGE_SIZE);
+                       memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
         } else {
-               new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0);
+               new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
+                                                         LOG_ALIGN);
         }
  
         if (unlikely(!new_log_buf)) {
@@ -872,7 +910,7 @@ void __init setup_log_buf(int early)
         memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
         raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  
-       pr_info("log_buf_len: %d\n", log_buf_len);
+       pr_info("log_buf_len: %d bytes\n", log_buf_len);
         pr_info("early log buf free: %d(%d%%)\n",
                 free, (free * 100) / __LOG_BUF_LEN);
  }
@@ -881,7 +919,7 @@ static bool __read_mostly ignore_loglevel;
  
  static int __init ignore_loglevel_setup(char *str)
  {
-       ignore_loglevel = 1;
+       ignore_loglevel = true;
         pr_info("debug: ignoring loglevel setting.\n");
  
         return 0;
@@ -947,11 +985,7 @@ static inline void boot_delay_msec(int level)
  }
  #endif
  
-#if defined(CONFIG_PRINTK_TIME)
-static bool printk_time = 1;
-#else
-static bool printk_time;
-#endif
+static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
  module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
  
  static size_t print_time(u64 ts, char *buf)
@@ -1310,7 +1344,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                          * for pending data, not the size; return the count of
                          * records, not the length.
                          */
-                       error = log_next_idx - syslog_idx;
+                       error = log_next_seq - syslog_seq;
                 } else {
                         u64 seq = syslog_seq;
                         u32 idx = syslog_idx;
@@ -1416,10 +1450,9 @@ static int have_callable_console(void)
  /*
   * Can we actually use the console at this time on this cpu?
   *
- * Console drivers may assume that per-cpu resources have
- * been allocated. So unless they're explicitly marked as
- * being able to cope (CON_ANYTIME) don't call them until
- * this CPU is officially up.
+ * Console drivers may assume that per-cpu resources have been allocated. So
+ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+ * call them until this CPU is officially up.
   */
  static inline int can_use_console(unsigned int cpu)
  {
@@ -1432,8 +1465,10 @@ static inline int can_use_console(unsigned int cpu)
   * console_lock held, and 'console_locked' set) if it
   * is successful, false otherwise.
   */
-static int console_trylock_for_printk(unsigned int cpu)
+static int console_trylock_for_printk(void)
  {
+       unsigned int cpu = smp_processor_id();
+
         if (!console_trylock())
                 return 0;
         /*
@@ -1476,7 +1511,7 @@ static struct cont {
         struct task_struct *owner;      /* task of first print*/
         u64 ts_nsec;                    /* time of first print */
         u8 level;                       /* log level of first message */
-       u8 facility;                    /* log level of first message */
+       u8 facility;                    /* log facility of first message */
         enum log_flags flags;           /* prefix, newline flags */
         bool flushed:1;                 /* buffer sealed and committed */
  } cont;
@@ -1608,7 +1643,8 @@ asmlinkage int vprintk_emit(int facility, int level,
                  */
                 if (!oops_in_progress && !lockdep_recursing(current)) {
                         recursion_bug = 1;
-                       goto out_restore_irqs;
+                       local_irq_restore(flags);
+                       return 0;
                 }
                 zap_locks();
         }
@@ -1716,21 +1752,30 @@ asmlinkage int vprintk_emit(int facility, int level,
  
         logbuf_cpu = UINT_MAX;
         raw_spin_unlock(&logbuf_lock);
+       lockdep_on();
+       local_irq_restore(flags);
  
         /* If called from the scheduler, we can not call up(). */
         if (!in_sched) {
+               lockdep_off();
+               /*
+                * Disable preemption to avoid being preempted while holding
+                * console_sem which would prevent anyone from printing to
+                * console
+                */
+               preempt_disable();
+
                 /*
                  * Try to acquire and then immediately release the console
                  * semaphore.  The release will print out buffers and wake up
                  * /dev/kmsg and syslog() users.
                  */
-               if (console_trylock_for_printk(this_cpu))
+               if (console_trylock_for_printk())
                         console_unlock();
+               preempt_enable();
+               lockdep_on();
         }
  
-       lockdep_on();
-out_restore_irqs:
-       local_irq_restore(flags);
         return printed_len;
  }
  EXPORT_SYMBOL(vprintk_emit);
@@ -1802,7 +1847,7 @@ EXPORT_SYMBOL(printk);
  
  #define LOG_LINE_MAX           0
  #define PREFIX_MAX             0
-#define LOG_LINE_MAX 0
+
  static u64 syslog_seq;
  static u32 syslog_idx;
  static u64 console_seq;
@@ -1881,11 +1926,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
         return 0;
  }
  /*
- * Set up a list of consoles.  Called from init/main.c
+ * Set up a console.  Called via do_early_param() in init/main.c
+ * for each "console=" parameter in the boot command line.
   */
  static int __init console_setup(char *str)
  {
-       char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
+       char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
         char *s, *options, *brl_options = NULL;
         int idx;
  
@@ -1902,7 +1948,8 @@ static int __init console_setup(char *str)
                 strncpy(buf, str, sizeof(buf) - 1);
         }
         buf[sizeof(buf) - 1] = 0;
-       if ((options = strchr(str, ',')) != NULL)
+       options = strchr(str, ',');
+       if (options)
                 *(options++) = 0;
  #ifdef __sparc__
         if (!strcmp(str, "ttya"))
@@ -1911,7 +1958,7 @@ static int __init console_setup(char *str)
                 strcpy(buf, "ttyS1");
  #endif
         for (s = buf; *s; s++)
-               if ((*s >= '0' && *s <= '9') || *s == ',')
+               if (isdigit(*s) || *s == ',')
                         break;
         idx = simple_strtoul(s, NULL, 10);
         *s = 0;
@@ -1950,7 +1997,6 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
              i++, c++)
                 if (strcmp(c->name, name) == 0 && c->index == idx) {
                         strlcpy(c->name, name_new, sizeof(c->name));
-                       c->name[sizeof(c->name) - 1] = 0;
                         c->options = options;
                         c->index = idx_new;
                         return i;
@@ -1959,12 +2005,12 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
         return -1;
  }
  
-bool console_suspend_enabled = 1;
+bool console_suspend_enabled = true;
  EXPORT_SYMBOL(console_suspend_enabled);
  
  static int __init console_suspend_disable(char *str)
  {
-       console_suspend_enabled = 0;
+       console_suspend_enabled = false;
         return 1;
  }
  __setup("no_console_suspend", console_suspend_disable);
@@ -2045,8 +2091,8 @@ EXPORT_SYMBOL(console_lock);
  /**
   * console_trylock - try to lock the console system for exclusive use.
   *
- * Tried to acquire a lock which guarantees that the caller has
- * exclusive access to the console system and the console_drivers list.
+ * Try to acquire a lock which guarantees that the caller has exclusive
+ * access to the console system and the console_drivers list.
   *
   * returns 1 on success, and 0 on failure to acquire the lock.
   */
@@ -2618,14 +2664,13 @@ EXPORT_SYMBOL(__printk_ratelimit);
  bool printk_timed_ratelimit(unsigned long *caller_jiffies,
                         unsigned int interval_msecs)
  {
-       if (*caller_jiffies == 0
-                       || !time_in_range(jiffies, *caller_jiffies,
-                                       *caller_jiffies
-                                       + msecs_to_jiffies(interval_msecs))) {
-               *caller_jiffies = jiffies;
-               return true;
-       }
-       return false;
+       unsigned long elapsed = jiffies - *caller_jiffies;
+
+       if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
+               return false;
+
+       *caller_jiffies = jiffies;
+       return true;
  }
  EXPORT_SYMBOL(printk_timed_ratelimit);
  
diff --git a/kernel/smp.c b/kernel/smp.c

index 487653b5844f92c722ae781210064824310452a7..aff8aa14f54795faa42ae9482da75e3ea9610a82 100644 (file)
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -670,7 +670,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
                         if (cond_func(cpu, info)) {
                                 ret = smp_call_function_single(cpu, func,
                                                                 info, wait);
-                               WARN_ON_ONCE(!ret);
+                               WARN_ON_ONCE(ret);
                         }
                 preempt_enable();
         }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 75b22e22a72c1abd4865bf7c0313af5514970939..75875a741b5e7f9cb26fcd837609638b1ffafda8 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1240,8 +1240,7 @@ static struct ctl_table vm_table[] = {
                 .maxlen         = sizeof(unsigned long),
                 .mode           = 0644,
                 .proc_handler   = hugetlb_sysctl_handler,
-               .extra1         = (void *)&hugetlb_zero,
-               .extra2         = (void *)&hugetlb_infinity,
+               .extra1         = &zero,
         },
  #ifdef CONFIG_NUMA
         {
@@ -1250,8 +1249,7 @@ static struct ctl_table vm_table[] = {
                 .maxlen         = sizeof(unsigned long),
                 .mode           = 0644,
                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
-               .extra1         = (void *)&hugetlb_zero,
-               .extra2         = (void *)&hugetlb_infinity,
+               .extra1         = &zero,
         },
  #endif
          {
@@ -1274,8 +1272,7 @@ static struct ctl_table vm_table[] = {
                 .maxlen         = sizeof(unsigned long),
                 .mode           = 0644,
                 .proc_handler   = hugetlb_overcommit_handler,
-               .extra1         = (void *)&hugetlb_zero,
-               .extra2         = (void *)&hugetlb_infinity,
+               .extra1         = &zero,
         },
  #endif
         {
diff --git a/kernel/watchdog.c b/kernel/watchdog.c

index c3319bd1b0408c1f5822748a4d0b1567c799760d..51b29e9d2ba65a700c15cb71923bce1359ebfa41 100644 (file)
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -260,9 +260,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
                         return;
  
                 if (hardlockup_panic)
-                       panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+                       panic("Watchdog detected hard LOCKUP on cpu %d",
+                             this_cpu);
                 else
-                       WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+                       WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
+                            this_cpu);
  
                 __this_cpu_write(hard_watchdog_warn, true);
                 return;
@@ -345,7 +347,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                         }
                 }
  
-               printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+               pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
                         smp_processor_id(), duration,
                         current->comm, task_pid_nr(current));
                 print_modules();
@@ -484,7 +486,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
         if (PTR_ERR(event) == -EOPNOTSUPP)
                 pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
         else if (PTR_ERR(event) == -ENOENT)
-               pr_warning("disabled (cpu%i): hardware events not enabled\n",
+               pr_warn("disabled (cpu%i): hardware events not enabled\n",
                          cpu);
         else
                 pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
diff --git a/lib/Kconfig b/lib/Kconfig

index a8a775730c09c27c4da8188cf84cc5e6c7608fe0..df872659ddd3d699a3f204b50720c59ff0f29092 100644 (file)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -396,6 +396,39 @@ config CPU_RMAP
  config DQL
         bool
  
+config GLOB
+       bool
+#      This actually supports modular compilation, but the module overhead
+#      is ridiculous for the amount of code involved.  Until an out-of-tree
+#      driver asks for it, we'll just link it directly it into the kernel
+#      when required.  Since we're ignoring out-of-tree users, there's also
+#      no need bother prompting for a manual decision:
+#      prompt "glob_match() function"
+       help
+         This option provides a glob_match function for performing
+         simple text pattern matching.  It originated in the ATA code
+         to blacklist particular drive models, but other device drivers
+         may need similar functionality.
+
+         All drivers in the Linux kernel tree that require this function
+         should automatically select this option.  Say N unless you
+         are compiling an out-of tree driver which tells you that it
+         depends on this.
+
+config GLOB_SELFTEST
+       bool "glob self-test on init"
+       default n
+       depends on GLOB
+       help
+         This option enables a simple self-test of the glob_match
+         function on startup.  It is primarily useful for people
+         working on the code to ensure they haven't introduced any
+         regressions.
+
+         It only adds a little bit of code and slows kernel boot (or
+         module load) by a small amount, so you're welcome to play with
+         it, but you probably don't need it.
+
  #
  # Netlink attribute parsing support is select'ed if needed
  #
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index cfe7df8f62ccb3b46d7b976fdd2acdefd9007540..cb45f59685e69530caf6d5b11f14c2e37d01772c 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,7 +15,7 @@ config PRINTK_TIME
           The behavior is also controlled by the kernel command line
           parameter printk.time=1. See Documentation/kernel-parameters.txt
  
-config DEFAULT_MESSAGE_LOGLEVEL
+config MESSAGE_LOGLEVEL_DEFAULT
         int "Default message log level (1-7)"
         range 1 7
         default "4"
diff --git a/lib/Makefile b/lib/Makefile

index 8427df95dade789ccffd7e7388b377ce00e84fda..d6b4bc496408e5ce44735606286cb923fbbefee3 100644 (file)
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -137,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
  
  obj-$(CONFIG_DQL) += dynamic_queue_limits.o
  
+obj-$(CONFIG_GLOB) += glob.o
+
  obj-$(CONFIG_MPILIB) += mpi/
  obj-$(CONFIG_SIGNATURE) += digsig.o
  
diff --git a/lib/bitmap.c b/lib/bitmap.c

index 06f7e4fe8d2de4046a3139106058b9c831c9b789..1e031f2c9aba1ba4f536147ef5a2adbc6954addb 100644 (file)
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -40,9 +40,9 @@
   * for the best explanations of this ordering.
   */
  
-int __bitmap_empty(const unsigned long *bitmap, int bits)
+int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
  {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
         for (k = 0; k < lim; ++k)
                 if (bitmap[k])
                         return 0;
@@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, int bits)
  }
  EXPORT_SYMBOL(__bitmap_empty);
  
-int __bitmap_full(const unsigned long *bitmap, int bits)
+int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
  {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
         for (k = 0; k < lim; ++k)
                 if (~bitmap[k])
                         return 0;
@@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, int bits)
  EXPORT_SYMBOL(__bitmap_full);
  
  int __bitmap_equal(const unsigned long *bitmap1,
-               const unsigned long *bitmap2, int bits)
+               const unsigned long *bitmap2, unsigned int bits)
  {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
         for (k = 0; k < lim; ++k)
                 if (bitmap1[k] != bitmap2[k])
                         return 0;
@@ -86,14 +86,14 @@ int __bitmap_equal(const unsigned long *bitmap1,
  }
  EXPORT_SYMBOL(__bitmap_equal);
  
-void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
  {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
         for (k = 0; k < lim; ++k)
                 dst[k] = ~src[k];
  
         if (bits % BITS_PER_LONG)
-               dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+               dst[k] = ~src[k];
  }
  EXPORT_SYMBOL(__bitmap_complement);
  
@@ -182,23 +182,26 @@ void __bitmap_shift_left(unsigned long *dst,
  EXPORT_SYMBOL(__bitmap_shift_left);
  
  int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                               const unsigned long *bitmap2, unsigned int bits)
  {
-       int k;
-       int nr = BITS_TO_LONGS(bits);
+       unsigned int k;
+       unsigned int lim = bits/BITS_PER_LONG;
         unsigned long result = 0;
  
-       for (k = 0; k < nr; k++)
+       for (k = 0; k < lim; k++)
                 result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+       if (bits % BITS_PER_LONG)
+               result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+                          BITMAP_LAST_WORD_MASK(bits));
         return result != 0;
  }
  EXPORT_SYMBOL(__bitmap_and);
  
  void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                               const unsigned long *bitmap2, unsigned int bits)
  {
-       int k;
-       int nr = BITS_TO_LONGS(bits);
+       unsigned int k;
+       unsigned int nr = BITS_TO_LONGS(bits);
  
         for (k = 0; k < nr; k++)
                 dst[k] = bitmap1[k] | bitmap2[k];
@@ -206,10 +209,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
  EXPORT_SYMBOL(__bitmap_or);
  
  void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                               const unsigned long *bitmap2, unsigned int bits)
  {
-       int k;
-       int nr = BITS_TO_LONGS(bits);
+       unsigned int k;
+       unsigned int nr = BITS_TO_LONGS(bits);
  
         for (k = 0; k < nr; k++)
                 dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -217,22 +220,25 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
  EXPORT_SYMBOL(__bitmap_xor);
  
  int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                               const unsigned long *bitmap2, unsigned int bits)
  {
-       int k;
-       int nr = BITS_TO_LONGS(bits);
+       unsigned int k;
+       unsigned int lim = bits/BITS_PER_LONG;
         unsigned long result = 0;
  
-       for (k = 0; k < nr; k++)
+       for (k = 0; k < lim; k++)
                 result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+       if (bits % BITS_PER_LONG)
+               result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
+                          BITMAP_LAST_WORD_MASK(bits));
         return result != 0;
  }
  EXPORT_SYMBOL(__bitmap_andnot);
  
  int __bitmap_intersects(const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                       const unsigned long *bitmap2, unsigned int bits)
  {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
         for (k = 0; k < lim; ++k)
                 if (bitmap1[k] & bitmap2[k])
                         return 1;
@@ -245,9 +251,9 @@ int __bitmap_intersects(const unsigned long *bitmap1,
  EXPORT_SYMBOL(__bitmap_intersects);
  
  int __bitmap_subset(const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                   const unsigned long *bitmap2, unsigned int bits)
  {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
         for (k = 0; k < lim; ++k)
                 if (bitmap1[k] & ~bitmap2[k])
                         return 0;
@@ -259,9 +265,10 @@ int __bitmap_subset(const unsigned long *bitmap1,
  }
  EXPORT_SYMBOL(__bitmap_subset);
  
-int __bitmap_weight(const unsigned long *bitmap, int bits)
+int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
  {
-       int k, w = 0, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
+       int w = 0;
  
         for (k = 0; k < lim; k++)
                 w += hweight_long(bitmap[k]);
@@ -273,42 +280,42 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
  }
  EXPORT_SYMBOL(__bitmap_weight);
  
-void bitmap_set(unsigned long *map, int start, int nr)
+void bitmap_set(unsigned long *map, unsigned int start, int len)
  {
         unsigned long *p = map + BIT_WORD(start);
-       const int size = start + nr;
+       const unsigned int size = start + len;
         int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
         unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
  
-       while (nr - bits_to_set >= 0) {
+       while (len - bits_to_set >= 0) {
                 *p |= mask_to_set;
-               nr -= bits_to_set;
+               len -= bits_to_set;
                 bits_to_set = BITS_PER_LONG;
                 mask_to_set = ~0UL;
                 p++;
         }
-       if (nr) {
+       if (len) {
                 mask_to_set &= BITMAP_LAST_WORD_MASK(size);
                 *p |= mask_to_set;
         }
  }
  EXPORT_SYMBOL(bitmap_set);
  
-void bitmap_clear(unsigned long *map, int start, int nr)
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
  {
         unsigned long *p = map + BIT_WORD(start);
-       const int size = start + nr;
+       const unsigned int size = start + len;
         int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
         unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
  
-       while (nr - bits_to_clear >= 0) {
+       while (len - bits_to_clear >= 0) {
                 *p &= ~mask_to_clear;
-               nr -= bits_to_clear;
+               len -= bits_to_clear;
                 bits_to_clear = BITS_PER_LONG;
                 mask_to_clear = ~0UL;
                 p++;
         }
-       if (nr) {
+       if (len) {
                 mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
                 *p &= ~mask_to_clear;
         }
@@ -664,13 +671,8 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
  
  int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
  {
-       char *nl  = strchr(bp, '\n');
-       int len;
-
-       if (nl)
-               len = nl - bp;
-       else
-               len = strlen(bp);
+       char *nl  = strchrnul(bp, '\n');
+       int len = nl - bp;
  
         return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
  }
@@ -716,7 +718,7 @@ EXPORT_SYMBOL(bitmap_parselist_user);
   *
   * If for example, just bits 4 through 7 are set in @buf, then @pos
   * values 4 through 7 will get mapped to 0 through 3, respectively,
- * and other @pos values will get mapped to 0.  When @pos value 7
+ * and other @pos values will get mapped to -1.  When @pos value 7
   * gets mapped to (returns) @ord value 3 in this example, that means
   * that bit 7 is the 3rd (starting with 0th) set bit in @buf.
   *
@@ -1046,7 +1048,7 @@ enum {
         REG_OP_RELEASE,         /* clear all bits in region */
  };
  
-static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op)
  {
         int nbits_reg;          /* number of bits in region */
         int index;              /* index first long of region in bitmap */
@@ -1112,11 +1114,11 @@ done:
   * Return the bit offset in bitmap of the allocated region,
   * or -errno on failure.
   */
-int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
  {
-       int pos, end;           /* scans bitmap by regions of size order */
+       unsigned int pos, end;          /* scans bitmap by regions of size order */
  
-       for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+       for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
                 if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
                         continue;
                 __reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1137,7 +1139,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
   *
   * No return value.
   */
-void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
  {
         __reg_op(bitmap, pos, order, REG_OP_RELEASE);
  }
@@ -1154,12 +1156,11 @@ EXPORT_SYMBOL(bitmap_release_region);
   * Return 0 on success, or %-EBUSY if specified region wasn't
   * free (not all bits were zero).
   */
-int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
  {
         if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
                 return -EBUSY;
-       __reg_op(bitmap, pos, order, REG_OP_ALLOC);
-       return 0;
+       return __reg_op(bitmap, pos, order, REG_OP_ALLOC);
  }
  EXPORT_SYMBOL(bitmap_allocate_region);
  
diff --git a/lib/cmdline.c b/lib/cmdline.c

index d4932f745e9214aaf62d8ad54b0f1092dd555414..76a712e6e20e3d0480d9f338565079a9089560dd 100644 (file)
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -121,11 +121,7 @@ EXPORT_SYMBOL(get_options);
   *     @retptr: (output) Optional pointer to next char after parse completes
   *
   *     Parses a string into a number.  The number stored at @ptr is
- *     potentially suffixed with %K (for kilobytes, or 1024 bytes),
- *     %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
- *     1073741824).  If the number is suffixed with K, M, or G, then
- *     the return value is the number multiplied by one kilobyte, one
- *     megabyte, or one gigabyte, respectively.
+ *     potentially suffixed with K, M, G, T, P, E.
   */
  
  unsigned long long memparse(const char *ptr, char **retptr)
@@ -135,6 +131,15 @@ unsigned long long memparse(const char *ptr, char **retptr)
         unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
  
         switch (*endptr) {
+       case 'E':
+       case 'e':
+               ret <<= 10;
+       case 'P':
+       case 'p':
+               ret <<= 10;
+       case 'T':
+       case 't':
+               ret <<= 10;
         case 'G':
         case 'g':
                 ret <<= 10;
diff --git a/lib/glob.c b/lib/glob.c

new file mode 100644 (file)

index 0000000..500fc80
--- /dev/null
+++ b/lib/glob.c
@@ -0,0 +1,287 @@
+#include <linux/module.h>
+#include <linux/glob.h>
+
+/*
+ * The only reason this code can be compiled as a module is because the
+ * ATA code that depends on it can be as well.  In practice, they're
+ * both usually compiled in and the module overhead goes away.
+ */
+MODULE_DESCRIPTION("glob(7) matching");
+MODULE_LICENSE("Dual MIT/GPL");
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match.  The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns.  Thus, it
+ * does not preprocess the patterns.  It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+bool __pure glob_match(char const *pat, char const *str)
+{
+       /*
+        * Backtrack to previous * on mismatch and retry starting one
+        * character later in the string.  Because * matches all characters
+        * (no exception for /), it can be easily proved that there's
+        * never a need to backtrack multiple levels.
+        */
+       char const *back_pat = NULL, *back_str = back_str;
+
+       /*
+        * Loop over each token (character or class) in pat, matching
+        * it against the remaining unmatched tail of str.  Return false
+        * on mismatch, or true after matching the trailing nul bytes.
+        */
+       for (;;) {
+               unsigned char c = *str++;
+               unsigned char d = *pat++;
+
+               switch (d) {
+               case '?':       /* Wildcard: anything but nul */
+                       if (c == '\0')
+                               return false;
+                       break;
+               case '*':       /* Any-length wildcard */
+                       if (*pat == '\0')       /* Optimize trailing * case */
+                               return true;
+                       back_pat = pat;
+                       back_str = --str;       /* Allow zero-length match */
+                       break;
+               case '[': {     /* Character class */
+                       bool match = false, inverted = (*pat == '!');
+                       char const *class = pat + inverted;
+                       unsigned char a = *class++;
+
+                       /*
+                        * Iterate over each span in the character class.
+                        * A span is either a single character a, or a
+                        * range a-b.  The first span may begin with ']'.
+                        */
+                       do {
+                               unsigned char b = a;
+
+                               if (a == '\0')  /* Malformed */
+                                       goto literal;
+
+                               if (class[0] == '-' && class[1] != ']') {
+                                       b = class[1];
+
+                                       if (b == '\0')
+                                               goto literal;
+
+                                       class += 2;
+                                       /* Any special action if a > b? */
+                               }
+                               match |= (a <= c && c <= b);
+                       } while ((a = *class++) != ']');
+
+                       if (match == inverted)
+                               goto backtrack;
+                       pat = class;
+                       }
+                       break;
+               case '\\':
+                       d = *pat++;
+                       /*FALLTHROUGH*/
+               default:        /* Literal character */
+literal:
+                       if (c == d) {
+                               if (d == '\0')
+                                       return true;
+                               break;
+                       }
+backtrack:
+                       if (c == '\0' || !back_pat)
+                               return false;   /* No point continuing */
+                       /* Try again from last *, one character later in str. */
+                       pat = back_pat;
+                       str = ++back_str;
+                       break;
+               }
+       }
+}
+EXPORT_SYMBOL(glob_match);
+
+
+#ifdef CONFIG_GLOB_SELFTEST
+
+#include <linux/printk.h>
+#include <linux/moduleparam.h>
+
+/* Boot with "glob.verbose=1" to show successful tests, too */
+static bool verbose = false;
+module_param(verbose, bool, 0);
+
+struct glob_test {
+       char const *pat, *str;
+       bool expected;
+};
+
+static bool __pure __init test(char const *pat, char const *str, bool expected)
+{
+       bool match = glob_match(pat, str);
+       bool success = match == expected;
+
+       /* Can't get string literals into a particular section, so... */
+       static char const msg_error[] __initconst =
+               KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
+       static char const msg_ok[] __initconst =
+               KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
+       static char const mismatch[] __initconst = "mismatch";
+       char const *message;
+
+       if (!success)
+               message = msg_error;
+       else if (verbose)
+               message = msg_ok;
+       else
+               return success;
+
+       printk(message, pat, str, mismatch + 3*match);
+       return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section.  The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static char const glob_tests[] __initconst =
+       /* Some basic tests */
+       "1" "a\0" "a\0"
+       "0" "a\0" "b\0"
+       "0" "a\0" "aa\0"
+       "0" "a\0" "\0"
+       "1" "\0" "\0"
+       "0" "\0" "a\0"
+       /* Simple character class tests */
+       "1" "[a]\0" "a\0"
+       "0" "[a]\0" "b\0"
+       "0" "[!a]\0" "a\0"
+       "1" "[!a]\0" "b\0"
+       "1" "[ab]\0" "a\0"
+       "1" "[ab]\0" "b\0"
+       "0" "[ab]\0" "c\0"
+       "1" "[!ab]\0" "c\0"
+       "1" "[a-c]\0" "b\0"
+       "0" "[a-c]\0" "d\0"
+       /* Corner cases in character class parsing */
+       "1" "[a-c-e-g]\0" "-\0"
+       "0" "[a-c-e-g]\0" "d\0"
+       "1" "[a-c-e-g]\0" "f\0"
+       "1" "[]a-ceg-ik[]\0" "a\0"
+       "1" "[]a-ceg-ik[]\0" "]\0"
+       "1" "[]a-ceg-ik[]\0" "[\0"
+       "1" "[]a-ceg-ik[]\0" "h\0"
+       "0" "[]a-ceg-ik[]\0" "f\0"
+       "0" "[!]a-ceg-ik[]\0" "h\0"
+       "0" "[!]a-ceg-ik[]\0" "]\0"
+       "1" "[!]a-ceg-ik[]\0" "f\0"
+       /* Simple wild cards */
+       "1" "?\0" "a\0"
+       "0" "?\0" "aa\0"
+       "0" "??\0" "a\0"
+       "1" "?x?\0" "axb\0"
+       "0" "?x?\0" "abx\0"
+       "0" "?x?\0" "xab\0"
+       /* Asterisk wild cards (backtracking) */
+       "0" "*??\0" "a\0"
+       "1" "*??\0" "ab\0"
+       "1" "*??\0" "abc\0"
+       "1" "*??\0" "abcd\0"
+       "0" "??*\0" "a\0"
+       "1" "??*\0" "ab\0"
+       "1" "??*\0" "abc\0"
+       "1" "??*\0" "abcd\0"
+       "0" "?*?\0" "a\0"
+       "1" "?*?\0" "ab\0"
+       "1" "?*?\0" "abc\0"
+       "1" "?*?\0" "abcd\0"
+       "1" "*b\0" "b\0"
+       "1" "*b\0" "ab\0"
+       "0" "*b\0" "ba\0"
+       "1" "*b\0" "bb\0"
+       "1" "*b\0" "abb\0"
+       "1" "*b\0" "bab\0"
+       "1" "*bc\0" "abbc\0"
+       "1" "*bc\0" "bc\0"
+       "1" "*bc\0" "bbc\0"
+       "1" "*bc\0" "bcbc\0"
+       /* Multiple asterisks (complex backtracking) */
+       "1" "*ac*\0" "abacadaeafag\0"
+       "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+       "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+       "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+       "1" "*abcd*\0" "abcabcabcabcdefg\0"
+       "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+       "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+       "0" "*abcd*\0" "abcabcabcabcefg\0"
+       "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+static int __init glob_init(void)
+{
+       unsigned successes = 0;
+       unsigned n = 0;
+       char const *p = glob_tests;
+       static char const message[] __initconst =
+               KERN_INFO "glob: %u self-tests passed, %u failed\n";
+
+       /*
+        * Tests are jammed together in a string.  The first byte is '1'
+        * or '0' to indicate the expected outcome, or '\0' to indicate the
+        * end of the tests.  Then come two null-terminated strings: the
+        * pattern and the string to match it against.
+        */
+       while (*p) {
+               bool expected = *p++ & 1;
+               char const *pat = p;
+
+               p += strlen(p) + 1;
+               successes += test(pat, p, expected);
+               p += strlen(p) + 1;
+               n++;
+       }
+
+       n -= successes;
+       printk(message, successes, n);
+
+       /* What's the errno for "kernel bug detected"?  Guess... */
+       return n ? -ECANCELED : 0;
+}
+
+/* We need a dummy exit function to allow unload */
+static void __exit glob_fini(void) { }
+
+module_init(glob_init);
+module_exit(glob_fini);
+
+#endif /* CONFIG_GLOB_SELFTEST */
diff --git a/lib/klist.c b/lib/klist.c

index 358a368a2947057ef9d9309c9dd4fc893523d63d..89b485a2a58d1755850d9baa2c50a1dc58fa3a15 100644 (file)
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
  EXPORT_SYMBOL_GPL(klist_add_tail);
  
  /**
- * klist_add_after - Init a klist_node and add it after an existing node
+ * klist_add_behind - Init a klist_node and add it after an existing node
   * @n: node we're adding.
   * @pos: node to put @n after
   */
-void klist_add_after(struct klist_node *n, struct klist_node *pos)
+void klist_add_behind(struct klist_node *n, struct klist_node *pos)
  {
         struct klist *k = knode_klist(pos);
  
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
         list_add(&n->n_node, &pos->n_node);
         spin_unlock(&k->k_lock);
  }
-EXPORT_SYMBOL_GPL(klist_add_after);
+EXPORT_SYMBOL_GPL(klist_add_behind);
  
  /**
   * klist_add_before - Init a klist_node and add it before an existing node
diff --git a/lib/list_sort.c b/lib/list_sort.c

index 1183fa70a44d26cfc673b6d5d2f645b7026f50dd..12bcba1c8612bcf998fe39548432548613591911 100644 (file)
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -1,3 +1,6 @@
+
+#define pr_fmt(fmt) "list_sort_test: " fmt
+
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/list_sort.h>
@@ -47,6 +50,7 @@ static void merge_and_restore_back_links(void *priv,
                                 struct list_head *a, struct list_head *b)
  {
         struct list_head *tail = head;
+       u8 count = 0;
  
         while (a && b) {
                 /* if equal, take 'a' -- important for sort stability */
@@ -70,7 +74,8 @@ static void merge_and_restore_back_links(void *priv,
                  * element comparison is needed, so the client's cmp()
                  * routine can invoke cond_resched() periodically.
                  */
-               (*cmp)(priv, tail->next, tail->next);
+               if (unlikely(!(++count)))
+                       (*cmp)(priv, tail->next, tail->next);
  
                 tail->next->prev = tail;
                 tail = tail->next;
@@ -123,9 +128,7 @@ void list_sort(void *priv, struct list_head *head,
                 }
                 if (lev > max_lev) {
                         if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
-                               printk_once(KERN_DEBUG "list passed to"
-                                       " list_sort() too long for"
-                                       " efficiency\n");
+                               printk_once(KERN_DEBUG "list too long for efficiency\n");
                                 lev--;
                         }
                         max_lev = lev;
@@ -168,27 +171,25 @@ static struct debug_el **elts __initdata;
  static int __init check(struct debug_el *ela, struct debug_el *elb)
  {
         if (ela->serial >= TEST_LIST_LEN) {
-               printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
-                               ela->serial);
+               pr_err("error: incorrect serial %d\n", ela->serial);
                 return -EINVAL;
         }
         if (elb->serial >= TEST_LIST_LEN) {
-               printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
-                               elb->serial);
+               pr_err("error: incorrect serial %d\n", elb->serial);
                 return -EINVAL;
         }
         if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
-               printk(KERN_ERR "list_sort_test: error: phantom element\n");
+               pr_err("error: phantom element\n");
                 return -EINVAL;
         }
         if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
-               printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
-                               ela->poison1, ela->poison2);
+               pr_err("error: bad poison: %#x/%#x\n",
+                       ela->poison1, ela->poison2);
                 return -EINVAL;
         }
         if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
-               printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
-                               elb->poison1, elb->poison2);
+               pr_err("error: bad poison: %#x/%#x\n",
+                       elb->poison1, elb->poison2);
                 return -EINVAL;
         }
         return 0;
@@ -207,25 +208,23 @@ static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
  
  static int __init list_sort_test(void)
  {
-       int i, count = 1, err = -EINVAL;
+       int i, count = 1, err = -ENOMEM;
         struct debug_el *el;
-       struct list_head *cur, *tmp;
+       struct list_head *cur;
         LIST_HEAD(head);
  
-       printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
+       pr_debug("start testing list_sort()\n");
  
-       elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
+       elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL);
         if (!elts) {
-               printk(KERN_ERR "list_sort_test: error: cannot allocate "
-                               "memory\n");
-               goto exit;
+               pr_err("error: cannot allocate memory\n");
+               return err;
         }
  
         for (i = 0; i < TEST_LIST_LEN; i++) {
                 el = kmalloc(sizeof(*el), GFP_KERNEL);
                 if (!el) {
-                       printk(KERN_ERR "list_sort_test: error: cannot "
-                                       "allocate memory\n");
+                       pr_err("error: cannot allocate memory\n");
                         goto exit;
                 }
                  /* force some equivalencies */
@@ -239,52 +238,52 @@ static int __init list_sort_test(void)
  
         list_sort(NULL, &head, cmp);
  
+       err = -EINVAL;
         for (cur = head.next; cur->next != &head; cur = cur->next) {
                 struct debug_el *el1;
                 int cmp_result;
  
                 if (cur->next->prev != cur) {
-                       printk(KERN_ERR "list_sort_test: error: list is "
-                                       "corrupted\n");
+                       pr_err("error: list is corrupted\n");
                         goto exit;
                 }
  
                 cmp_result = cmp(NULL, cur, cur->next);
                 if (cmp_result > 0) {
-                       printk(KERN_ERR "list_sort_test: error: list is not "
-                                       "sorted\n");
+                       pr_err("error: list is not sorted\n");
                         goto exit;
                 }
  
                 el = container_of(cur, struct debug_el, list);
                 el1 = container_of(cur->next, struct debug_el, list);
                 if (cmp_result == 0 && el->serial >= el1->serial) {
-                       printk(KERN_ERR "list_sort_test: error: order of "
-                                       "equivalent elements not preserved\n");
+                       pr_err("error: order of equivalent elements not "
+                               "preserved\n");
                         goto exit;
                 }
  
                 if (check(el, el1)) {
-                       printk(KERN_ERR "list_sort_test: error: element check "
-                                       "failed\n");
+                       pr_err("error: element check failed\n");
                         goto exit;
                 }
                 count++;
         }
+       if (head.prev != cur) {
+               pr_err("error: list is corrupted\n");
+               goto exit;
+       }
+
  
         if (count != TEST_LIST_LEN) {
-               printk(KERN_ERR "list_sort_test: error: bad list length %d",
-                               count);
+               pr_err("error: bad list length %d", count);
                 goto exit;
         }
  
         err = 0;
  exit:
+       for (i = 0; i < TEST_LIST_LEN; i++)
+               kfree(elts[i]);
         kfree(elts);
-       list_for_each_safe(cur, tmp, &head) {
-               list_del(cur);
-               kfree(container_of(cur, struct debug_el, list));
-       }
         return err;
  }
  module_init(list_sort_test);
diff --git a/lib/string_helpers.c b/lib/string_helpers.c

index ed5c1454dd6288aacf85e4ee7db5e2e3ee947627..29033f319aea1f8f48e85374884f657a130519a4 100644 (file)
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -25,12 +25,15 @@
  int string_get_size(u64 size, const enum string_size_units units,
                     char *buf, int len)
  {
-       static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB",
-                                  "EB", "ZB", "YB", NULL};
-       static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
-                                "EiB", "ZiB", "YiB", NULL };
-       static const char **units_str[] = {
-               [STRING_UNITS_10] =  units_10,
+       static const char *const units_10[] = {
+               "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
+       };
+       static const char *const units_2[] = {
+               "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
+               NULL
+       };
+       static const char *const *const units_str[] = {
+               [STRING_UNITS_10] = units_10,
                 [STRING_UNITS_2] = units_2,
         };
         static const unsigned int divisor[] = {
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c

index bea3f3fa3f02a920fb92b9293c0adf06d72a52a6..4137bca5f8e8e5008ca88b7fcb801096856eaf0c 100644 (file)
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -3,7 +3,7 @@
  #include <linux/module.h>
  
  #define for_each_test(i, test) \
-       for (i = 0; i < sizeof(test) / sizeof(test[0]); i++)
+       for (i = 0; i < ARRAY_SIZE(test); i++)
  
  struct test_fail {
         const char *str;
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c

index d63381e8e3331064463283420d872842e27c8a9a..d20ef458f1374ed62962a494b185926f90b547f8 100644 (file)
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -249,52 +249,6 @@ int zlib_deflateInit2(
      return zlib_deflateReset(strm);
  }
  
-/* ========================================================================= */
-#if 0
-int zlib_deflateSetDictionary(
-       z_streamp strm,
-       const Byte *dictionary,
-       uInt  dictLength
-)
-{
-    deflate_state *s;
-    uInt length = dictLength;
-    uInt n;
-    IPos hash_head = 0;
-
-    if (strm == NULL || strm->state == NULL || dictionary == NULL)
-       return Z_STREAM_ERROR;
-
-    s = (deflate_state *) strm->state;
-    if (s->status != INIT_STATE) return Z_STREAM_ERROR;
-
-    strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
-
-    if (length < MIN_MATCH) return Z_OK;
-    if (length > MAX_DIST(s)) {
-       length = MAX_DIST(s);
-#ifndef USE_DICT_HEAD
-       dictionary += dictLength - length; /* use the tail of the dictionary */
-#endif
-    }
-    memcpy((char *)s->window, dictionary, length);
-    s->strstart = length;
-    s->block_start = (long)length;
-
-    /* Insert all strings in the hash table (except for the last two bytes).
-     * s->lookahead stays null, so s->ins_h will be recomputed at the next
-     * call of fill_window.
-     */
-    s->ins_h = s->window[0];
-    UPDATE_HASH(s, s->ins_h, s->window[1]);
-    for (n = 0; n <= length - MIN_MATCH; n++) {
-       INSERT_STRING(s, n, hash_head);
-    }
-    if (hash_head) hash_head = 0;  /* to make compiler happy */
-    return Z_OK;
-}
-#endif  /*  0  */
-
  /* ========================================================================= */
  int zlib_deflateReset(
         z_streamp strm
@@ -326,45 +280,6 @@ int zlib_deflateReset(
      return Z_OK;
  }
  
-/* ========================================================================= */
-#if 0
-int zlib_deflateParams(
-       z_streamp strm,
-       int level,
-       int strategy
-)
-{
-    deflate_state *s;
-    compress_func func;
-    int err = Z_OK;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    s = (deflate_state *) strm->state;
-
-    if (level == Z_DEFAULT_COMPRESSION) {
-       level = 6;
-    }
-    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
-       return Z_STREAM_ERROR;
-    }
-    func = configuration_table[s->level].func;
-
-    if (func != configuration_table[level].func && strm->total_in != 0) {
-       /* Flush the last buffer: */
-       err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
-    }
-    if (s->level != level) {
-       s->level = level;
-       s->max_lazy_match   = configuration_table[level].max_lazy;
-       s->good_match       = configuration_table[level].good_length;
-       s->nice_match       = configuration_table[level].nice_length;
-       s->max_chain_length = configuration_table[level].max_chain;
-    }
-    s->strategy = strategy;
-    return err;
-}
-#endif  /*  0  */
-
  /* =========================================================================
   * Put a short in the pending buffer. The 16-bit value is put in MSB order.
   * IN assertion: the stream state is correct and there is enough room in
@@ -568,64 +483,6 @@ int zlib_deflateEnd(
      return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
  }
  
-/* =========================================================================
- * Copy the source state to the destination state.
- */
-#if 0
-int zlib_deflateCopy (
-       z_streamp dest,
-       z_streamp source
-)
-{
-#ifdef MAXSEG_64K
-    return Z_STREAM_ERROR;
-#else
-    deflate_state *ds;
-    deflate_state *ss;
-    ush *overlay;
-    deflate_workspace *mem;
-
-
-    if (source == NULL || dest == NULL || source->state == NULL) {
-        return Z_STREAM_ERROR;
-    }
-
-    ss = (deflate_state *) source->state;
-
-    *dest = *source;
-
-    mem = (deflate_workspace *) dest->workspace;
-
-    ds = &(mem->deflate_memory);
-
-    dest->state = (struct internal_state *) ds;
-    *ds = *ss;
-    ds->strm = dest;
-
-    ds->window = (Byte *) mem->window_memory;
-    ds->prev   = (Pos *)  mem->prev_memory;
-    ds->head   = (Pos *)  mem->head_memory;
-    overlay = (ush *) mem->overlay_memory;
-    ds->pending_buf = (uch *) overlay;
-
-    memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-    memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
-    memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
-    memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-
-    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-
-    ds->l_desc.dyn_tree = ds->dyn_ltree;
-    ds->d_desc.dyn_tree = ds->dyn_dtree;
-    ds->bl_desc.dyn_tree = ds->bl_tree;
-
-    return Z_OK;
-#endif
-}
-#endif  /*  0  */
-
  /* ===========================================================================
   * Read a new buffer from the current input stream, update the adler32
   * and total number of bytes read.  All deflate() input goes through
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c

index f5ce87b0800edd421beedc2264c9b9a2a81db885..58a733b1038740f2faefca7efb40de57131e5ac6 100644 (file)
--- a/lib/zlib_inflate/inflate.c
+++ b/lib/zlib_inflate/inflate.c
@@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm)
      return Z_OK;
  }
  
-#if 0
-int zlib_inflatePrime(z_streamp strm, int bits, int value)
-{
-    struct inflate_state *state;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
-    value &= (1L << bits) - 1;
-    state->hold += value << state->bits;
-    state->bits += bits;
-    return Z_OK;
-}
-#endif
-
  int zlib_inflateInit2(z_streamp strm, int windowBits)
  {
      struct inflate_state *state;
@@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm)
      return Z_OK;
  }
  
-#if 0
-int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
-        uInt dictLength)
-{
-    struct inflate_state *state;
-    unsigned long id;
-
-    /* check state */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (state->wrap != 0 && state->mode != DICT)
-        return Z_STREAM_ERROR;
-
-    /* check for correct dictionary id */
-    if (state->mode == DICT) {
-        id = zlib_adler32(0L, NULL, 0);
-        id = zlib_adler32(id, dictionary, dictLength);
-        if (id != state->check)
-            return Z_DATA_ERROR;
-    }
-
-    /* copy dictionary to window */
-    zlib_updatewindow(strm, strm->avail_out);
-
-    if (dictLength > state->wsize) {
-        memcpy(state->window, dictionary + dictLength - state->wsize,
-                state->wsize);
-        state->whave = state->wsize;
-    }
-    else {
-        memcpy(state->window + state->wsize - dictLength, dictionary,
-                dictLength);
-        state->whave = dictLength;
-    }
-    state->havedict = 1;
-    return Z_OK;
-}
-#endif
-
-#if 0
-/*
-   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
-   or when out of input.  When called, *have is the number of pattern bytes
-   found in order so far, in 0..3.  On return *have is updated to the new
-   state.  If on return *have equals four, then the pattern was found and the
-   return value is how many bytes were read including the last byte of the
-   pattern.  If *have is less than four, then the pattern has not been found
-   yet and the return value is len.  In the latter case, zlib_syncsearch() can be
-   called again with more data and the *have state.  *have is initialized to
-   zero for the first call.
- */
-static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
-        unsigned len)
-{
-    unsigned got;
-    unsigned next;
-
-    got = *have;
-    next = 0;
-    while (next < len && got < 4) {
-        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
-            got++;
-        else if (buf[next])
-            got = 0;
-        else
-            got = 4 - got;
-        next++;
-    }
-    *have = got;
-    return next;
-}
-#endif
-
-#if 0
-int zlib_inflateSync(z_streamp strm)
-{
-    unsigned len;               /* number of bytes to look at or looked at */
-    unsigned long in, out;      /* temporary to save total_in and total_out */
-    unsigned char buf[4];       /* to restore bit buffer to byte string */
-    struct inflate_state *state;
-
-    /* check parameters */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
-    /* if first time, start search in bit buffer */
-    if (state->mode != SYNC) {
-        state->mode = SYNC;
-        state->hold <<= state->bits & 7;
-        state->bits -= state->bits & 7;
-        len = 0;
-        while (state->bits >= 8) {
-            buf[len++] = (unsigned char)(state->hold);
-            state->hold >>= 8;
-            state->bits -= 8;
-        }
-        state->have = 0;
-        zlib_syncsearch(&(state->have), buf, len);
-    }
-
-    /* search available input */
-    len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
-    strm->avail_in -= len;
-    strm->next_in += len;
-    strm->total_in += len;
-
-    /* return no joy or set up to restart inflate() on a new block */
-    if (state->have != 4) return Z_DATA_ERROR;
-    in = strm->total_in;  out = strm->total_out;
-    zlib_inflateReset(strm);
-    strm->total_in = in;  strm->total_out = out;
-    state->mode = TYPE;
-    return Z_OK;
-}
-#endif
-
  /*
   * This subroutine adds the data at next_in/avail_in to the output history
   * without performing any output.  The output buffer must be "caught up";
diff --git a/mm/Kconfig b/mm/Kconfig

index 3e9977a9d657dd0df323488a6f47e0ba1f23e6f8..886db2158538572ee52790b31d93b4baeb8b187f 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -508,21 +508,34 @@ config CMA_DEBUG
           processing calls such as dma_alloc_from_contiguous().
           This option does not affect warning and error messages.
  
-config ZBUD
-       tristate
-       default n
+config CMA_AREAS
+       int "Maximum count of the CMA areas"
+       depends on CMA
+       default 7
         help
-         A special purpose allocator for storing compressed pages.
-         It is designed to store up to two compressed pages per physical
-         page.  While this design limits storage density, it has simple and
-         deterministic reclaim properties that make it preferable to a higher
-         density approach when reclaim will be used.
+         CMA allows to create CMA areas for particular purpose, mainly,
+         used as device private area. This parameter sets the maximum
+         number of CMA area in the system.
+
+         If unsure, leave the default value "7".
+
+config MEM_SOFT_DIRTY
+       bool "Track memory changes"
+       depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
+       select PROC_PAGE_MONITOR
+       help
+         This option enables memory changes tracking by introducing a
+         soft-dirty bit on pte-s. This bit it set when someone writes
+         into a page just as regular dirty bit, but unlike the latter
+         it can be cleared by hands.
+
+         See Documentation/vm/soft-dirty.txt for more details.
  
  config ZSWAP
         bool "Compressed cache for swap pages (EXPERIMENTAL)"
         depends on FRONTSWAP && CRYPTO=y
         select CRYPTO_LZO
-       select ZBUD
+       select ZPOOL
         default n
         help
           A lightweight compressed cache for swap pages.  It takes
@@ -538,17 +551,22 @@ config ZSWAP
           they have not be fully explored on the large set of potential
           configurations and workloads that exist.
  
-config MEM_SOFT_DIRTY
-       bool "Track memory changes"
-       depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
-       select PROC_PAGE_MONITOR
+config ZPOOL
+       tristate "Common API for compressed memory storage"
+       default n
         help
-         This option enables memory changes tracking by introducing a
-         soft-dirty bit on pte-s. This bit it set when someone writes
-         into a page just as regular dirty bit, but unlike the latter
-         it can be cleared by hands.
+         Compressed memory storage API.  This allows using either zbud or
+         zsmalloc.
  
-         See Documentation/vm/soft-dirty.txt for more details.
+config ZBUD
+       tristate "Low density storage for compressed pages"
+       default n
+       help
+         A special purpose allocator for storing compressed pages.
+         It is designed to store up to two compressed pages per physical
+         page.  While this design limits storage density, it has simple and
+         deterministic reclaim properties that make it preferable to a higher
+         density approach when reclaim will be used.
  
  config ZSMALLOC
         tristate "Memory allocator for compressed pages"
diff --git a/mm/Makefile b/mm/Makefile

index 4064f3ec145e3b2d760f720371c6d3e6830dbd99..632ae77e6070ebfad40b644a1321d6af17507541 100644 (file)
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -59,6 +59,8 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
  obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
  obj-$(CONFIG_CLEANCACHE) += cleancache.o
  obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
+obj-$(CONFIG_ZPOOL)    += zpool.o
  obj-$(CONFIG_ZBUD)     += zbud.o
  obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
  obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
+obj-$(CONFIG_CMA)      += cma.o
diff --git a/mm/cma.c b/mm/cma.c

new file mode 100644 (file)

index 0000000..c17751c
--- /dev/null
+++ b/mm/cma.c
@@ -0,0 +1,335 @@
+/*
+ * Contiguous Memory Allocator
+ *
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Copyright IBM Corporation, 2013
+ * Copyright LG Electronics Inc., 2014
+ * Written by:
+ *     Marek Szyprowski <m.szyprowski@samsung.com>
+ *     Michal Nazarewicz <mina86@mina86.com>
+ *     Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *     Joonsoo Kim <iamjoonsoo.kim@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/cma.h>
+
+struct cma {
+       unsigned long   base_pfn;
+       unsigned long   count;
+       unsigned long   *bitmap;
+       unsigned int order_per_bit; /* Order of pages represented by one bit */
+       struct mutex    lock;
+};
+
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
+static DEFINE_MUTEX(cma_mutex);
+
+phys_addr_t cma_get_base(struct cma *cma)
+{
+       return PFN_PHYS(cma->base_pfn);
+}
+
+unsigned long cma_get_size(struct cma *cma)
+{
+       return cma->count << PAGE_SHIFT;
+}
+
+static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
+{
+       return (1UL << (align_order >> cma->order_per_bit)) - 1;
+}
+
+static unsigned long cma_bitmap_maxno(struct cma *cma)
+{
+       return cma->count >> cma->order_per_bit;
+}
+
+static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
+                                               unsigned long pages)
+{
+       return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
+}
+
+static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+       unsigned long bitmap_no, bitmap_count;
+
+       bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
+       bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+       mutex_lock(&cma->lock);
+       bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
+       mutex_unlock(&cma->lock);
+}
+
+static int __init cma_activate_area(struct cma *cma)
+{
+       int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
+       unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+       unsigned i = cma->count >> pageblock_order;
+       struct zone *zone;
+
+       cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+       if (!cma->bitmap)
+               return -ENOMEM;
+
+       WARN_ON_ONCE(!pfn_valid(pfn));
+       zone = page_zone(pfn_to_page(pfn));
+
+       do {
+               unsigned j;
+
+               base_pfn = pfn;
+               for (j = pageblock_nr_pages; j; --j, pfn++) {
+                       WARN_ON_ONCE(!pfn_valid(pfn));
+                       /*
+                        * alloc_contig_range requires the pfn range
+                        * specified to be in the same zone. Make this
+                        * simple by forcing the entire CMA resv range
+                        * to be in the same zone.
+                        */
+                       if (page_zone(pfn_to_page(pfn)) != zone)
+                               goto err;
+               }
+               init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+       } while (--i);
+
+       mutex_init(&cma->lock);
+       return 0;
+
+err:
+       kfree(cma->bitmap);
+       return -EINVAL;
+}
+
+static int __init cma_init_reserved_areas(void)
+{
+       int i;
+
+       for (i = 0; i < cma_area_count; i++) {
+               int ret = cma_activate_area(&cma_areas[i]);
+
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+core_initcall(cma_init_reserved_areas);
+
+/**
+ * cma_declare_contiguous() - reserve custom contiguous area
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @size: Size of the reserved area (in bytes),
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @alignment: Alignment for the CMA area, should be power of 2 or zero
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @fixed: hint about where to place the reserved area
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base.  If false,
+ * reserve in range from @base to @limit.
+ */
+int __init cma_declare_contiguous(phys_addr_t base,
+                       phys_addr_t size, phys_addr_t limit,
+                       phys_addr_t alignment, unsigned int order_per_bit,
+                       bool fixed, struct cma **res_cma)
+{
+       struct cma *cma;
+       int ret = 0;
+
+       pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
+               __func__, (unsigned long)size, (unsigned long)base,
+               (unsigned long)limit, (unsigned long)alignment);
+
+       if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+               pr_err("Not enough slots for CMA reserved regions!\n");
+               return -ENOSPC;
+       }
+
+       if (!size)
+               return -EINVAL;
+
+       if (alignment && !is_power_of_2(alignment))
+               return -EINVAL;
+
+       /*
+        * Sanitise input arguments.
+        * Pages both ends in CMA area could be merged into adjacent unmovable
+        * migratetype page by page allocator's buddy algorithm. In the case,
+        * you couldn't get a contiguous memory, which is not what we want.
+        */
+       alignment = max(alignment,
+               (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+       base = ALIGN(base, alignment);
+       size = ALIGN(size, alignment);
+       limit &= ~(alignment - 1);
+
+       /* size should be aligned with order_per_bit */
+       if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
+               return -EINVAL;
+
+       /* Reserve memory */
+       if (base && fixed) {
+               if (memblock_is_region_reserved(base, size) ||
+                   memblock_reserve(base, size) < 0) {
+                       ret = -EBUSY;
+                       goto err;
+               }
+       } else {
+               phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+                                                       limit);
+               if (!addr) {
+                       ret = -ENOMEM;
+                       goto err;
+               } else {
+                       base = addr;
+               }
+       }
+
+       /*
+        * Each reserved area must be initialised later, when more kernel
+        * subsystems (like slab allocator) are available.
+        */
+       cma = &cma_areas[cma_area_count];
+       cma->base_pfn = PFN_DOWN(base);
+       cma->count = size >> PAGE_SHIFT;
+       cma->order_per_bit = order_per_bit;
+       *res_cma = cma;
+       cma_area_count++;
+
+       pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+               (unsigned long)base);
+       return 0;
+
+err:
+       pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+       return ret;
+}
+
+/**
+ * cma_alloc() - allocate pages from contiguous area
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates part of contiguous memory on specific
+ * contiguous memory area.
+ */
+struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
+{
+       unsigned long mask, pfn, start = 0;
+       unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+       struct page *page = NULL;
+       int ret;
+
+       if (!cma || !cma->count)
+               return NULL;
+
+       pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+                count, align);
+
+       if (!count)
+               return NULL;
+
+       mask = cma_bitmap_aligned_mask(cma, align);
+       bitmap_maxno = cma_bitmap_maxno(cma);
+       bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+       for (;;) {
+               mutex_lock(&cma->lock);
+               bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
+                               bitmap_maxno, start, bitmap_count, mask);
+               if (bitmap_no >= bitmap_maxno) {
+                       mutex_unlock(&cma->lock);
+                       break;
+               }
+               bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
+               /*
+                * It's safe to drop the lock here. We've marked this region for
+                * our exclusive use. If the migration fails we will take the
+                * lock again and unmark it.
+                */
+               mutex_unlock(&cma->lock);
+
+               pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+               mutex_lock(&cma_mutex);
+               ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+               mutex_unlock(&cma_mutex);
+               if (ret == 0) {
+                       page = pfn_to_page(pfn);
+                       break;
+               }
+
+               cma_clear_bitmap(cma, pfn, count);
+               if (ret != -EBUSY)
+                       break;
+
+               pr_debug("%s(): memory range at %p is busy, retrying\n",
+                        __func__, pfn_to_page(pfn));
+               /* try again with a bit different memory target */
+               start = bitmap_no + mask + 1;
+       }
+
+       pr_debug("%s(): returned %p\n", __func__, page);
+       return page;
+}
+
+/**
+ * cma_release() - release allocated pages
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool cma_release(struct cma *cma, struct page *pages, int count)
+{
+       unsigned long pfn;
+
+       if (!cma || !pages)
+               return false;
+
+       pr_debug("%s(page %p)\n", __func__, (void *)pages);
+
+       pfn = page_to_pfn(pages);
+
+       if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+               return false;
+
+       VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+
+       free_contig_range(pfn, count);
+       cma_clear_bitmap(cma, pfn, count);
+
+       return true;
+}
diff --git a/mm/filemap.c b/mm/filemap.c

index 65d44fd88c7850c5f1ae23d5d2e46b40ce831317..af19a6b079f5a5ae3a001079cb5b564260be1c22 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page)
  }
  EXPORT_SYMBOL_GPL(__lock_page_killable);
  
+/*
+ * Return values:
+ * 1 - page is locked; mmap_sem is still held.
+ * 0 - page is not locked.
+ *     mmap_sem has been released (up_read()), unless flags had both
+ *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
+ *     which case mmap_sem is still held.
+ *
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
+ * with the page locked and the mmap_sem unperturbed.
+ */
  int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                          unsigned int flags)
  {
@@ -1091,9 +1102,9 @@ no_page:
                 if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
                         fgp_flags |= FGP_LOCK;
  
-               /* Init accessed so avoit atomic mark_page_accessed later */
+               /* Init accessed so avoid atomic mark_page_accessed later */
                 if (fgp_flags & FGP_ACCESSED)
-                       init_page_accessed(page);
+                       __SetPageReferenced(page);
  
                 err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
                 if (unlikely(err)) {
@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
   * The goto's are kind of ugly, but this streamlines the normal case of having
   * it in the page cache, and handles the special cases reasonably without
   * having a lot of duplicated code.
+ *
+ * vma->vm_mm->mmap_sem must be held on entry.
+ *
+ * If our return value has VM_FAULT_RETRY set, it's because
+ * lock_page_or_retry() returned 0.
+ * The mmap_sem has usually been released in this case.
+ * See __lock_page_or_retry() for the exception.
+ *
+ * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
+ * has not been released.
+ *
+ * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
   */
  int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
  {
diff --git a/mm/gup.c b/mm/gup.c

index cc5a9e7adea77ff50e8c538b36686db466bffb54..91d044b1600dd6b216decb62ca9f1bd7ef152c52 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -258,6 +258,11 @@ unmap:
         return ret;
  }
  
+/*
+ * mmap_sem must be held on entry.  If @nonblocking != NULL and
+ * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
+ * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
+ */
  static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
                 unsigned long address, unsigned int *flags, int *nonblocking)
  {
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
   * with a put_page() call when it is finished with. vmas will only
   * remain valid while mmap_sem is held.
   *
- * Must be called with mmap_sem held for read or write.
+ * Must be called with mmap_sem held.  It may be released.  See below.
   *
   * __get_user_pages walks a process's page tables and takes a reference to
   * each struct page that each user address corresponds to at a given
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
   *
   * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
   * or mmap_sem contention, and if waiting is needed to pin all pages,
- * *@nonblocking will be set to 0.
+ * *@nonblocking will be set to 0.  Further, if @gup_flags does not
+ * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
+ * this case.
+ *
+ * A caller using such a combination of @nonblocking and @gup_flags
+ * must therefore hold the mmap_sem for reading only, and recognize
+ * when it's been released.  Otherwise, it must be held for either
+ * reading or writing and will not be released.
   *
   * In most cases, get_user_pages or get_user_pages_fast should be used
   * instead of __get_user_pages. __get_user_pages should be used only if
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
   * such architectures, gup() will not be enough to make a subsequent access
   * succeed.
   *
- * This should be called with the mm_sem held for read.
+ * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
   */
  int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                      unsigned long address, unsigned int fault_flags)
diff --git a/mm/highmem.c b/mm/highmem.c

index b32b70cdaed6cba1ba79914228593441f4b312d3..123bcd3ed4f209ba3710d9bfcaf8725d0a105534 100644 (file)
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -44,6 +44,66 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx);
   */
  #ifdef CONFIG_HIGHMEM
  
+/*
+ * Architecture with aliasing data cache may define the following family of
+ * helper functions in its asm/highmem.h to control cache color of virtual
+ * addresses where physical memory pages are mapped by kmap.
+ */
+#ifndef get_pkmap_color
+
+/*
+ * Determine color of virtual address where the page should be mapped.
+ */
+static inline unsigned int get_pkmap_color(struct page *page)
+{
+       return 0;
+}
+#define get_pkmap_color get_pkmap_color
+
+/*
+ * Get next index for mapping inside PKMAP region for page with given color.
+ */
+static inline unsigned int get_next_pkmap_nr(unsigned int color)
+{
+       static unsigned int last_pkmap_nr;
+
+       last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
+       return last_pkmap_nr;
+}
+
+/*
+ * Determine if page index inside PKMAP region (pkmap_nr) of given color
+ * has wrapped around PKMAP region end. When this happens an attempt to
+ * flush all unused PKMAP slots is made.
+ */
+static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color)
+{
+       return pkmap_nr == 0;
+}
+
+/*
+ * Get the number of PKMAP entries of the given color. If no free slot is
+ * found after checking that many entries, kmap will sleep waiting for
+ * someone to call kunmap and free PKMAP slot.
+ */
+static inline int get_pkmap_entries_count(unsigned int color)
+{
+       return LAST_PKMAP;
+}
+
+/*
+ * Get head of a wait queue for PKMAP entries of the given color.
+ * Wait queues for different mapping colors should be independent to avoid
+ * unnecessary wakeups caused by freeing of slots of other colors.
+ */
+static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
+{
+       static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
+
+       return &pkmap_map_wait;
+}
+#endif
+
  unsigned long totalhigh_pages __read_mostly;
  EXPORT_SYMBOL(totalhigh_pages);
  
@@ -68,13 +128,10 @@ unsigned int nr_free_highpages (void)
  }
  
  static int pkmap_count[LAST_PKMAP];
-static unsigned int last_pkmap_nr;
  static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
  
  pte_t * pkmap_page_table;
  
-static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
-
  /*
   * Most architectures have no use for kmap_high_get(), so let's abstract
   * the disabling of IRQ out of the locking in that case to save on a
@@ -161,15 +218,17 @@ static inline unsigned long map_new_virtual(struct page *page)
  {
         unsigned long vaddr;
         int count;
+       unsigned int last_pkmap_nr;
+       unsigned int color = get_pkmap_color(page);
  
  start:
-       count = LAST_PKMAP;
+       count = get_pkmap_entries_count(color);
         /* Find an empty entry */
         for (;;) {
-               last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
-               if (!last_pkmap_nr) {
+               last_pkmap_nr = get_next_pkmap_nr(color);
+               if (no_more_pkmaps(last_pkmap_nr, color)) {
                         flush_all_zero_pkmaps();
-                       count = LAST_PKMAP;
+                       count = get_pkmap_entries_count(color);
                 }
                 if (!pkmap_count[last_pkmap_nr])
                         break;  /* Found a usable entry */
@@ -181,12 +240,14 @@ start:
                  */
                 {
                         DECLARE_WAITQUEUE(wait, current);
+                       wait_queue_head_t *pkmap_map_wait =
+                               get_pkmap_wait_queue_head(color);
  
                         __set_current_state(TASK_UNINTERRUPTIBLE);
-                       add_wait_queue(&pkmap_map_wait, &wait);
+                       add_wait_queue(pkmap_map_wait, &wait);
                         unlock_kmap();
                         schedule();
-                       remove_wait_queue(&pkmap_map_wait, &wait);
+                       remove_wait_queue(pkmap_map_wait, &wait);
                         lock_kmap();
  
                         /* Somebody else might have mapped it while we slept */
@@ -274,6 +335,8 @@ void kunmap_high(struct page *page)
         unsigned long nr;
         unsigned long flags;
         int need_wakeup;
+       unsigned int color = get_pkmap_color(page);
+       wait_queue_head_t *pkmap_map_wait;
  
         lock_kmap_any(flags);
         vaddr = (unsigned long)page_address(page);
@@ -299,13 +362,14 @@ void kunmap_high(struct page *page)
                  * no need for the wait-queue-head's lock.  Simply
                  * test if the queue is empty.
                  */
-               need_wakeup = waitqueue_active(&pkmap_map_wait);
+               pkmap_map_wait = get_pkmap_wait_queue_head(color);
+               need_wakeup = waitqueue_active(pkmap_map_wait);
         }
         unlock_kmap_any(flags);
  
         /* do wake-up, if needed, race-free outside of the spin lock */
         if (need_wakeup)
-               wake_up(&pkmap_map_wait);
+               wake_up(pkmap_map_wait);
  }
  
  EXPORT_SYMBOL(kunmap_high);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 33514d88fef9b041cef11c74717091eec4805f80..3630d577e9879e9d6dc6a80912e2eb88d5f1c959 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -827,7 +827,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 count_vm_event(THP_FAULT_FALLBACK);
                 return VM_FAULT_FALLBACK;
         }
-       if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) {
+       if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) {
                 put_page(page);
                 count_vm_event(THP_FAULT_FALLBACK);
                 return VM_FAULT_FALLBACK;
@@ -1132,7 +1132,7 @@ alloc:
                 goto out;
         }
  
-       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) {
+       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) {
                 put_page(new_page);
                 if (page) {
                         split_huge_page(page);
@@ -1681,7 +1681,7 @@ static void __split_huge_page_refcount(struct page *page,
                            &page_tail->_count);
  
                 /* after clearing PageTail the gup refcount can be released */
-               smp_mb();
+               smp_mb__after_atomic();
  
                 /*
                  * retain hwpoison flag of the poisoned tail page:
@@ -1775,6 +1775,8 @@ static int __split_huge_page_map(struct page *page,
         if (pmd) {
                 pgtable = pgtable_trans_huge_withdraw(mm, pmd);
                 pmd_populate(mm, &_pmd, pgtable);
+               if (pmd_write(*pmd))
+                       BUG_ON(page_mapcount(page) != 1);
  
                 haddr = address;
                 for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1784,8 +1786,6 @@ static int __split_huge_page_map(struct page *page,
                         entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                         if (!pmd_write(*pmd))
                                 entry = pte_wrprotect(entry);
-                       else
-                               BUG_ON(page_mapcount(page) != 1);
                         if (!pmd_young(*pmd))
                                 entry = pte_mkold(entry);
                         if (pmd_numa(*pmd))
@@ -2233,6 +2233,30 @@ static void khugepaged_alloc_sleep(void)
  
  static int khugepaged_node_load[MAX_NUMNODES];
  
+static bool khugepaged_scan_abort(int nid)
+{
+       int i;
+
+       /*
+        * If zone_reclaim_mode is disabled, then no extra effort is made to
+        * allocate memory locally.
+        */
+       if (!zone_reclaim_mode)
+               return false;
+
+       /* If there is a count for this node already, it must be acceptable */
+       if (khugepaged_node_load[nid])
+               return false;
+
+       for (i = 0; i < MAX_NUMNODES; i++) {
+               if (!khugepaged_node_load[i])
+                       continue;
+               if (node_distance(nid, i) > RECLAIM_DISTANCE)
+                       return true;
+       }
+       return false;
+}
+
  #ifdef CONFIG_NUMA
  static int khugepaged_find_target_node(void)
  {
@@ -2399,7 +2423,7 @@ static void collapse_huge_page(struct mm_struct *mm,
         if (!new_page)
                 return;
  
-       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)))
+       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE)))
                 return;
  
         /*
@@ -2545,6 +2569,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                  * hit record.
                  */
                 node = page_to_nid(page);
+               if (khugepaged_scan_abort(node))
+                       goto out_unmap;
                 khugepaged_node_load[node]++;
                 VM_BUG_ON_PAGE(PageCompound(page), page);
                 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 7a0a73d2fcff128850b32af9910a873d6fb384f5..eeceeeb0901978f378ead370134ba37c253ab04c 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,6 @@
  #include <linux/node.h>
  #include "internal.h"
  
-const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
  unsigned long hugepages_treat_as_movable;
  
  int hugetlb_max_hstate __read_mostly;
@@ -1089,6 +1088,9 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
         unsigned long pfn;
         struct hstate *h;
  
+       if (!hugepages_supported())
+               return;
+
         /* Set scan step to minimum hugepage size */
         for_each_hstate(h)
                 if (order > huge_page_order(h))
@@ -1734,21 +1736,13 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj,
         return sprintf(buf, "%lu\n", nr_huge_pages);
  }
  
-static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
-                       struct kobject *kobj, struct kobj_attribute *attr,
-                       const char *buf, size_t len)
+static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
+                                          struct hstate *h, int nid,
+                                          unsigned long count, size_t len)
  {
         int err;
-       int nid;
-       unsigned long count;
-       struct hstate *h;
         NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
  
-       err = kstrtoul(buf, 10, &count);
-       if (err)
-               goto out;
-
-       h = kobj_to_hstate(kobj, &nid);
         if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
                 err = -EINVAL;
                 goto out;
@@ -1784,6 +1778,23 @@ out:
         return err;
  }
  
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+                                        struct kobject *kobj, const char *buf,
+                                        size_t len)
+{
+       struct hstate *h;
+       unsigned long count;
+       int nid;
+       int err;
+
+       err = kstrtoul(buf, 10, &count);
+       if (err)
+               return err;
+
+       h = kobj_to_hstate(kobj, &nid);
+       return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len);
+}
+
  static ssize_t nr_hugepages_show(struct kobject *kobj,
                                        struct kobj_attribute *attr, char *buf)
  {
@@ -1793,7 +1804,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj,
  static ssize_t nr_hugepages_store(struct kobject *kobj,
                struct kobj_attribute *attr, const char *buf, size_t len)
  {
-       return nr_hugepages_store_common(false, kobj, attr, buf, len);
+       return nr_hugepages_store_common(false, kobj, buf, len);
  }
  HSTATE_ATTR(nr_hugepages);
  
@@ -1812,7 +1823,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
  static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
                struct kobj_attribute *attr, const char *buf, size_t len)
  {
-       return nr_hugepages_store_common(true, kobj, attr, buf, len);
+       return nr_hugepages_store_common(true, kobj, buf, len);
  }
  HSTATE_ATTR(nr_hugepages_mempolicy);
  #endif
@@ -2248,36 +2259,21 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
                          void __user *buffer, size_t *length, loff_t *ppos)
  {
         struct hstate *h = &default_hstate;
-       unsigned long tmp;
+       unsigned long tmp = h->max_huge_pages;
         int ret;
  
         if (!hugepages_supported())
                 return -ENOTSUPP;
  
-       tmp = h->max_huge_pages;
-
-       if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
-               return -EINVAL;
-
         table->data = &tmp;
         table->maxlen = sizeof(unsigned long);
         ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
         if (ret)
                 goto out;
  
-       if (write) {
-               NODEMASK_ALLOC(nodemask_t, nodes_allowed,
-                                               GFP_KERNEL | __GFP_NORETRY);
-               if (!(obey_mempolicy &&
-                              init_nodemask_of_mempolicy(nodes_allowed))) {
-                       NODEMASK_FREE(nodes_allowed);
-                       nodes_allowed = &node_states[N_MEMORY];
-               }
-               h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
-
-               if (nodes_allowed != &node_states[N_MEMORY])
-                       NODEMASK_FREE(nodes_allowed);
-       }
+       if (write)
+               ret = __nr_hugepages_store_common(obey_mempolicy, h,
+                                                 NUMA_NO_NODE, tmp, *length);
  out:
         return ret;
  }
@@ -2754,8 +2750,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
   * from other VMAs and let the children be SIGKILLed if they are faulting the
   * same region.
   */
-static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
-                               struct page *page, unsigned long address)
+static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+                             struct page *page, unsigned long address)
  {
         struct hstate *h = hstate_vma(vma);
         struct vm_area_struct *iter_vma;
@@ -2794,8 +2790,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
                                              address + huge_page_size(h), page);
         }
         mutex_unlock(&mapping->i_mmap_mutex);
-
-       return 1;
  }
  
  /*
@@ -2810,7 +2804,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
  {
         struct hstate *h = hstate_vma(vma);
         struct page *old_page, *new_page;
-       int outside_reserve = 0;
+       int ret = 0, outside_reserve = 0;
         unsigned long mmun_start;       /* For mmu_notifiers */
         unsigned long mmun_end;         /* For mmu_notifiers */
  
@@ -2840,14 +2834,14 @@ retry_avoidcopy:
  
         page_cache_get(old_page);
  
-       /* Drop page table lock as buddy allocator may be called */
+       /*
+        * Drop page table lock as buddy allocator may be called. It will
+        * be acquired again before returning to the caller, as expected.
+        */
         spin_unlock(ptl);
         new_page = alloc_huge_page(vma, address, outside_reserve);
  
         if (IS_ERR(new_page)) {
-               long err = PTR_ERR(new_page);
-               page_cache_release(old_page);
-
                 /*
                  * If a process owning a MAP_PRIVATE mapping fails to COW,
                  * it is due to references held by a child and an insufficient
@@ -2856,29 +2850,25 @@ retry_avoidcopy:
                  * may get SIGKILLed if it later faults.
                  */
                 if (outside_reserve) {
+                       page_cache_release(old_page);
                         BUG_ON(huge_pte_none(pte));
-                       if (unmap_ref_private(mm, vma, old_page, address)) {
-                               BUG_ON(huge_pte_none(pte));
-                               spin_lock(ptl);
-                               ptep = huge_pte_offset(mm, address & huge_page_mask(h));
-                               if (likely(ptep &&
-                                          pte_same(huge_ptep_get(ptep), pte)))
-                                       goto retry_avoidcopy;
-                               /*
-                                * race occurs while re-acquiring page table
-                                * lock, and our job is done.
-                                */
-                               return 0;
-                       }
-                       WARN_ON_ONCE(1);
+                       unmap_ref_private(mm, vma, old_page, address);
+                       BUG_ON(huge_pte_none(pte));
+                       spin_lock(ptl);
+                       ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+                       if (likely(ptep &&
+                                  pte_same(huge_ptep_get(ptep), pte)))
+                               goto retry_avoidcopy;
+                       /*
+                        * race occurs while re-acquiring page table
+                        * lock, and our job is done.
+                        */
+                       return 0;
                 }
  
-               /* Caller expects lock to be held */
-               spin_lock(ptl);
-               if (err == -ENOMEM)
-                       return VM_FAULT_OOM;
-               else
-                       return VM_FAULT_SIGBUS;
+               ret = (PTR_ERR(new_page) == -ENOMEM) ?
+                       VM_FAULT_OOM : VM_FAULT_SIGBUS;
+               goto out_release_old;
         }
  
         /*
@@ -2886,11 +2876,8 @@ retry_avoidcopy:
          * anon_vma prepared.
          */
         if (unlikely(anon_vma_prepare(vma))) {
-               page_cache_release(new_page);
-               page_cache_release(old_page);
-               /* Caller expects lock to be held */
-               spin_lock(ptl);
-               return VM_FAULT_OOM;
+               ret = VM_FAULT_OOM;
+               goto out_release_all;
         }
  
         copy_user_huge_page(new_page, old_page, address, vma,
@@ -2900,6 +2887,7 @@ retry_avoidcopy:
         mmun_start = address & huge_page_mask(h);
         mmun_end = mmun_start + huge_page_size(h);
         mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
         /*
          * Retake the page table lock to check for racing updates
          * before the page tables are altered
@@ -2920,12 +2908,13 @@ retry_avoidcopy:
         }
         spin_unlock(ptl);
         mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out_release_all:
         page_cache_release(new_page);
+out_release_old:
         page_cache_release(old_page);
  
-       /* Caller expects lock to be held */
-       spin_lock(ptl);
-       return 0;
+       spin_lock(ptl); /* Caller expects lock to be held */
+       return ret;
  }
  
  /* Return the pagecache page at a given address within a VMA */
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c

index 95487c71cad59737994d77e1d47c1ff07a7d4365..329caf56df22d84d02495e35f051062b01bdeaba 100644 (file)
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -72,8 +72,7 @@ DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
  
  static void pfn_inject_exit(void)
  {
-       if (hwpoison_dir)
-               debugfs_remove_recursive(hwpoison_dir);
+       debugfs_remove_recursive(hwpoison_dir);
  }
  
  static int pfn_inject_init(void)
diff --git a/mm/internal.h b/mm/internal.h

index 7f22a11fcc66e3e6e0b5a16319f7f4dc232e0ffb..a1b651b11c5fcba7a0322bc19c93286896d08cbf 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -247,7 +247,7 @@ static inline void mlock_migrate_page(struct page *new, struct page *old) { }
  static inline struct page *mem_map_offset(struct page *base, int offset)
  {
         if (unlikely(offset >= MAX_ORDER_NR_PAGES))
-               return pfn_to_page(page_to_pfn(base) + offset);
+               return nth_page(base, offset);
         return base + offset;
  }
  
diff --git a/mm/madvise.c b/mm/madvise.c

index a402f8fdc68e94888ea177104524085c9f490fd5..0938b30da4abbb91aa01e7884c6c236728403dee 100644 (file)
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -292,9 +292,6 @@ static long madvise_dontneed(struct vm_area_struct *vma,
  /*
   * Application wants to free up the pages and associated backing store.
   * This is effectively punching a hole into the middle of a file.
- *
- * NOTE: Currently, only shmfs/tmpfs is supported for this operation.
- * Other filesystems return -ENOSYS.
   */
  static long madvise_remove(struct vm_area_struct *vma,
                                 struct vm_area_struct **prev,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index f009a14918d29c8c9b7c09db6c60428a0042f644..90dc501eaf3fbcbc7a60efeb1a4b3072220c04dc 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2551,55 +2551,72 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
         return NOTIFY_OK;
  }
  
-
-/* See mem_cgroup_try_charge() for details */
-enum {
-       CHARGE_OK,              /* success */
-       CHARGE_RETRY,           /* need to retry but retry is not bad */
-       CHARGE_NOMEM,           /* we can't do more. return -ENOMEM */
-       CHARGE_WOULDBLOCK,      /* GFP_WAIT wasn't set and no enough res. */
-};
-
-static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
-                               unsigned int nr_pages, unsigned int min_pages,
-                               bool invoke_oom)
+/**
+ * mem_cgroup_try_charge - try charging a memcg
+ * @memcg: memcg to charge
+ * @nr_pages: number of pages to charge
+ *
+ * Returns 0 if @memcg was charged successfully, -EINTR if the charge
+ * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
+ */
+static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
+                                gfp_t gfp_mask,
+                                unsigned int nr_pages)
  {
-       unsigned long csize = nr_pages * PAGE_SIZE;
+       unsigned int batch = max(CHARGE_BATCH, nr_pages);
+       int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
         struct mem_cgroup *mem_over_limit;
         struct res_counter *fail_res;
+       unsigned long nr_reclaimed;
         unsigned long flags = 0;
-       int ret;
+       unsigned long long size;
+       int ret = 0;
  
-       ret = res_counter_charge(&memcg->res, csize, &fail_res);
+retry:
+       if (consume_stock(memcg, nr_pages))
+               goto done;
  
-       if (likely(!ret)) {
+       size = batch * PAGE_SIZE;
+       if (!res_counter_charge(&memcg->res, size, &fail_res)) {
                 if (!do_swap_account)
-                       return CHARGE_OK;
-               ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
-               if (likely(!ret))
-                       return CHARGE_OK;
-
-               res_counter_uncharge(&memcg->res, csize);
+                       goto done_restock;
+               if (!res_counter_charge(&memcg->memsw, size, &fail_res))
+                       goto done_restock;
+               res_counter_uncharge(&memcg->res, size);
                 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
                 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
         } else
                 mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
+
+       if (batch > nr_pages) {
+               batch = nr_pages;
+               goto retry;
+       }
+
         /*
-        * Never reclaim on behalf of optional batching, retry with a
-        * single page instead.
+        * Unlike in global OOM situations, memcg is not in a physical
+        * memory shortage.  Allow dying and OOM-killed tasks to
+        * bypass the last charges so that they can exit quickly and
+        * free their memory.
          */
-       if (nr_pages > min_pages)
-               return CHARGE_RETRY;
+       if (unlikely(test_thread_flag(TIF_MEMDIE) ||
+                    fatal_signal_pending(current) ||
+                    current->flags & PF_EXITING))
+               goto bypass;
+
+       if (unlikely(task_in_memcg_oom(current)))
+               goto nomem;
  
         if (!(gfp_mask & __GFP_WAIT))
-               return CHARGE_WOULDBLOCK;
+               goto nomem;
  
-       if (gfp_mask & __GFP_NORETRY)
-               return CHARGE_NOMEM;
+       nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
  
-       ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
         if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
-               return CHARGE_RETRY;
+               goto retry;
+
+       if (gfp_mask & __GFP_NORETRY)
+               goto nomem;
         /*
          * Even though the limit is exceeded at this point, reclaim
          * may have been able to free some pages.  Retry the charge
@@ -2609,96 +2626,38 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
          * unlikely to succeed so close to the limit, and we fall back
          * to regular pages anyway in case of failure.
          */
-       if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
-               return CHARGE_RETRY;
-
+       if (nr_reclaimed && nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER))
+               goto retry;
         /*
          * At task move, charge accounts can be doubly counted. So, it's
          * better to wait until the end of task_move if something is going on.
          */
         if (mem_cgroup_wait_acct_move(mem_over_limit))
-               return CHARGE_RETRY;
-
-       if (invoke_oom)
-               mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
-
-       return CHARGE_NOMEM;
-}
-
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
-                                gfp_t gfp_mask,
-                                unsigned int nr_pages,
-                                bool oom)
-{
-       unsigned int batch = max(CHARGE_BATCH, nr_pages);
-       int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
-       int ret;
-
-       if (mem_cgroup_is_root(memcg))
-               goto done;
-       /*
-        * Unlike in global OOM situations, memcg is not in a physical
-        * memory shortage.  Allow dying and OOM-killed tasks to
-        * bypass the last charges so that they can exit quickly and
-        * free their memory.
-        */
-       if (unlikely(test_thread_flag(TIF_MEMDIE) ||
-                    fatal_signal_pending(current) ||
-                    current->flags & PF_EXITING))
-               goto bypass;
+               goto retry;
  
-       if (unlikely(task_in_memcg_oom(current)))
-               goto nomem;
+       if (nr_retries--)
+               goto retry;
  
         if (gfp_mask & __GFP_NOFAIL)
-               oom = false;
-again:
-       if (consume_stock(memcg, nr_pages))
-               goto done;
-
-       do {
-               bool invoke_oom = oom && !nr_oom_retries;
-
-               /* If killed, bypass charge */
-               if (fatal_signal_pending(current))
-                       goto bypass;
+               goto bypass;
  
-               ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
-                                          nr_pages, invoke_oom);
-               switch (ret) {
-               case CHARGE_OK:
-                       break;
-               case CHARGE_RETRY: /* not in OOM situation but retry */
-                       batch = nr_pages;
-                       goto again;
-               case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
-                       goto nomem;
-               case CHARGE_NOMEM: /* OOM routine works */
-                       if (!oom || invoke_oom)
-                               goto nomem;
-                       nr_oom_retries--;
-                       break;
-               }
-       } while (ret != CHARGE_OK);
+       if (fatal_signal_pending(current))
+               goto bypass;
  
-       if (batch > nr_pages)
-               refill_stock(memcg, batch - nr_pages);
-done:
-       return 0;
+       mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages));
  nomem:
         if (!(gfp_mask & __GFP_NOFAIL))
                 return -ENOMEM;
  bypass:
-       return -EINTR;
+       memcg = root_mem_cgroup;
+       ret = -EINTR;
+       goto retry;
+
+done_restock:
+       if (batch > nr_pages)
+               refill_stock(memcg, batch - nr_pages);
+done:
+       return ret;
  }
  
  /**
@@ -2712,15 +2671,14 @@ bypass:
   */
  static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
                                  gfp_t gfp_mask,
-                                unsigned int nr_pages,
-                                bool oom)
+                                unsigned int nr_pages)
  
  {
         struct mem_cgroup *memcg;
         int ret;
  
         memcg = get_mem_cgroup_from_mm(mm);
-       ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
+       ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
         css_put(&memcg->css);
         if (ret == -EINTR)
                 memcg = root_mem_cgroup;
@@ -2738,13 +2696,11 @@ static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
  static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
                                        unsigned int nr_pages)
  {
-       if (!mem_cgroup_is_root(memcg)) {
-               unsigned long bytes = nr_pages * PAGE_SIZE;
+       unsigned long bytes = nr_pages * PAGE_SIZE;
  
-               res_counter_uncharge(&memcg->res, bytes);
-               if (do_swap_account)
-                       res_counter_uncharge(&memcg->memsw, bytes);
-       }
+       res_counter_uncharge(&memcg->res, bytes);
+       if (do_swap_account)
+               res_counter_uncharge(&memcg->memsw, bytes);
  }
  
  /*
@@ -2756,9 +2712,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
  {
         unsigned long bytes = nr_pages * PAGE_SIZE;
  
-       if (mem_cgroup_is_root(memcg))
-               return;
-
         res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
         if (do_swap_account)
                 res_counter_uncharge_until(&memcg->memsw,
@@ -2842,14 +2795,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
         }
  
         pc->mem_cgroup = memcg;
-       /*
-        * We access a page_cgroup asynchronously without lock_page_cgroup().
-        * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
-        * is accessed after testing USED bit. To make pc->mem_cgroup visible
-        * before USED bit, we need memory barrier here.
-        * See mem_cgroup_add_lru_list(), etc.
-        */
-       smp_wmb();
         SetPageCgroupUsed(pc);
  
         if (lrucare) {
@@ -2937,8 +2882,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
         if (ret)
                 return ret;
  
-       ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
-                                   oom_gfp_allowed(gfp));
+       ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT);
         if (ret == -EINTR)  {
                 /*
                  * mem_cgroup_try_charge() chosed to bypass to root due to
@@ -3463,12 +3407,13 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
                 memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
                 return;
         }
-
+       /*
+        * The page is freshly allocated and not visible to any
+        * outside callers yet.  Set up pc non-atomically.
+        */
         pc = lookup_page_cgroup(page);
-       lock_page_cgroup(pc);
         pc->mem_cgroup = memcg;
-       SetPageCgroupUsed(pc);
-       unlock_page_cgroup(pc);
+       pc->flags = PCG_USED;
  }
  
  void __memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -3478,19 +3423,11 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
  
  
         pc = lookup_page_cgroup(page);
-       /*
-        * Fast unlocked return. Theoretically might have changed, have to
-        * check again after locking.
-        */
         if (!PageCgroupUsed(pc))
                 return;
  
-       lock_page_cgroup(pc);
-       if (PageCgroupUsed(pc)) {
-               memcg = pc->mem_cgroup;
-               ClearPageCgroupUsed(pc);
-       }
-       unlock_page_cgroup(pc);
+       memcg = pc->mem_cgroup;
+       pc->flags = 0;
  
         /*
          * We trust that only if there is a memcg associated with the page, it
@@ -3531,7 +3468,6 @@ void mem_cgroup_split_huge_fixup(struct page *head)
         for (i = 1; i < HPAGE_PMD_NR; i++) {
                 pc = head_pc + i;
                 pc->mem_cgroup = memcg;
-               smp_wmb();/* see __commit_charge() */
                 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
         }
         __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
@@ -3687,7 +3623,6 @@ int mem_cgroup_charge_anon(struct page *page,
  {
         unsigned int nr_pages = 1;
         struct mem_cgroup *memcg;
-       bool oom = true;
  
         if (mem_cgroup_disabled())
                 return 0;
@@ -3699,14 +3634,9 @@ int mem_cgroup_charge_anon(struct page *page,
         if (PageTransHuge(page)) {
                 nr_pages <<= compound_order(page);
                 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-               /*
-                * Never OOM-kill a process for a huge page.  The
-                * fault handler will fall back to regular pages.
-                */
-               oom = false;
         }
  
-       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
+       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
         if (!memcg)
                 return -ENOMEM;
         __mem_cgroup_commit_charge(memcg, page, nr_pages,
@@ -3743,7 +3673,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
                 memcg = try_get_mem_cgroup_from_page(page);
         if (!memcg)
                 memcg = get_mem_cgroup_from_mm(mm);
-       ret = mem_cgroup_try_charge(memcg, mask, 1, true);
+       ret = mem_cgroup_try_charge(memcg, mask, 1);
         css_put(&memcg->css);
         if (ret == -EINTR)
                 memcg = root_mem_cgroup;
@@ -3770,7 +3700,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
         if (!PageSwapCache(page)) {
                 struct mem_cgroup *memcg;
  
-               memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+               memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
                 if (!memcg)
                         return -ENOMEM;
                 *memcgp = memcg;
@@ -3839,7 +3769,7 @@ int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
                 return 0;
         }
  
-       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
         if (!memcg)
                 return -ENOMEM;
         __mem_cgroup_commit_charge(memcg, page, 1, type, false);
@@ -3993,7 +3923,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
          * replacement page, so leave it alone when phasing out the
          * page that is unused after the migration.
          */
-       if (!end_migration && !mem_cgroup_is_root(memcg))
+       if (!end_migration)
                 mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
  
         return memcg;
@@ -4126,8 +4056,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
                  * We uncharge this because swap is freed.  This memcg can
                  * be obsolete one. We avoid calling css_tryget_online().
                  */
-               if (!mem_cgroup_is_root(memcg))
-                       res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+               res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
                 mem_cgroup_swap_statistics(memcg, false);
                 css_put(&memcg->css);
         }
@@ -4817,78 +4746,24 @@ out:
         return retval;
  }
  
-
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
-                                              enum mem_cgroup_stat_index idx)
-{
-       struct mem_cgroup *iter;
-       long val = 0;
-
-       /* Per-cpu values can be negative, use a signed accumulator */
-       for_each_mem_cgroup_tree(iter, memcg)
-               val += mem_cgroup_read_stat(iter, idx);
-
-       if (val < 0) /* race ? */
-               val = 0;
-       return val;
-}
-
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
-{
-       u64 val;
-
-       if (!mem_cgroup_is_root(memcg)) {
-               if (!swap)
-                       return res_counter_read_u64(&memcg->res, RES_USAGE);
-               else
-                       return res_counter_read_u64(&memcg->memsw, RES_USAGE);
-       }
-
-       /*
-        * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
-        * as well as in MEM_CGROUP_STAT_RSS_HUGE.
-        */
-       val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
-       val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-
-       if (swap)
-               val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-
-       return val << PAGE_SHIFT;
-}
-
  static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
-                                  struct cftype *cft)
+                              struct cftype *cft)
  {
         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-       u64 val;
-       int name;
-       enum res_type type;
-
-       type = MEMFILE_TYPE(cft->private);
-       name = MEMFILE_ATTR(cft->private);
+       enum res_type type = MEMFILE_TYPE(cft->private);
+       int name = MEMFILE_ATTR(cft->private);
  
         switch (type) {
         case _MEM:
-               if (name == RES_USAGE)
-                       val = mem_cgroup_usage(memcg, false);
-               else
-                       val = res_counter_read_u64(&memcg->res, name);
-               break;
+               return res_counter_read_u64(&memcg->res, name);
         case _MEMSWAP:
-               if (name == RES_USAGE)
-                       val = mem_cgroup_usage(memcg, true);
-               else
-                       val = res_counter_read_u64(&memcg->memsw, name);
-               break;
+               return res_counter_read_u64(&memcg->memsw, name);
         case _KMEM:
-               val = res_counter_read_u64(&memcg->kmem, name);
+               return res_counter_read_u64(&memcg->kmem, name);
                 break;
         default:
                 BUG();
         }
-
-       return val;
  }
  
  #ifdef CONFIG_MEMCG_KMEM
@@ -5350,7 +5225,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
         if (!t)
                 goto unlock;
  
-       usage = mem_cgroup_usage(memcg, swap);
+       if (!swap)
+               usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+       else
+               usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
  
         /*
          * current_threshold points to threshold just below or equal to usage.
@@ -5446,15 +5324,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
  
         mutex_lock(&memcg->thresholds_lock);
  
-       if (type == _MEM)
+       if (type == _MEM) {
                 thresholds = &memcg->thresholds;
-       else if (type == _MEMSWAP)
+               usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+       } else if (type == _MEMSWAP) {
                 thresholds = &memcg->memsw_thresholds;
-       else
+               usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+       } else
                 BUG();
  
-       usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
         /* Check if a threshold crossed before adding a new one */
         if (thresholds->primary)
                 __mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -5534,18 +5412,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
         int i, j, size;
  
         mutex_lock(&memcg->thresholds_lock);
-       if (type == _MEM)
+
+       if (type == _MEM) {
                 thresholds = &memcg->thresholds;
-       else if (type == _MEMSWAP)
+               usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+       } else if (type == _MEMSWAP) {
                 thresholds = &memcg->memsw_thresholds;
-       else
+               usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+       } else
                 BUG();
  
         if (!thresholds->primary)
                 goto unlock;
  
-       usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
         /* Check if a threshold crossed before removing */
         __mem_cgroup_threshold(memcg, type == _MEMSWAP);
  
@@ -6299,9 +6178,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
                  * core guarantees its existence.
                  */
         } else {
-               res_counter_init(&memcg->res, NULL);
-               res_counter_init(&memcg->memsw, NULL);
-               res_counter_init(&memcg->kmem, NULL);
+               res_counter_init(&memcg->res, &root_mem_cgroup->res);
+               res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
+               res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
                 /*
                  * Deeper hierachy with use_hierarchy == false doesn't make
                  * much sense so let cgroup subsystem know about this
@@ -6435,55 +6314,39 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
  
  #ifdef CONFIG_MMU
  /* Handlers for move charge at task migration. */
-#define PRECHARGE_COUNT_AT_ONCE        256
  static int mem_cgroup_do_precharge(unsigned long count)
  {
-       int ret = 0;
-       int batch_count = PRECHARGE_COUNT_AT_ONCE;
-       struct mem_cgroup *memcg = mc.to;
+       int ret;
  
-       if (mem_cgroup_is_root(memcg)) {
+       /* Try a single bulk charge without reclaim first */
+       ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+       if (!ret) {
                 mc.precharge += count;
-               /* we don't need css_get for root */
                 return ret;
         }
-       /* try to charge at once */
-       if (count > 1) {
-               struct res_counter *dummy;
-               /*
-                * "memcg" cannot be under rmdir() because we've already checked
-                * by cgroup_lock_live_cgroup() that it is not removed and we
-                * are still under the same cgroup_mutex. So we can postpone
-                * css_get().
-                */
-               if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
-                       goto one_by_one;
-               if (do_swap_account && res_counter_charge(&memcg->memsw,
-                                               PAGE_SIZE * count, &dummy)) {
-                       res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
-                       goto one_by_one;
-               }
-               mc.precharge += count;
+       if (ret == -EINTR) {
+               __mem_cgroup_cancel_charge(root_mem_cgroup, count);
                 return ret;
         }
-one_by_one:
-       /* fall back to one by one charge */
+
+       /* Try charges one by one with reclaim */
         while (count--) {
-               if (signal_pending(current)) {
-                       ret = -EINTR;
-                       break;
-               }
-               if (!batch_count--) {
-                       batch_count = PRECHARGE_COUNT_AT_ONCE;
-                       cond_resched();
-               }
-               ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
+               ret = mem_cgroup_try_charge(mc.to,
+                                           GFP_KERNEL & ~__GFP_NORETRY, 1);
+               /*
+                * In case of failure, any residual charges against
+                * mc.to will be dropped by mem_cgroup_clear_mc()
+                * later on.  However, cancel any charges that are
+                * bypassed to root right away or they'll be lost.
+                */
+               if (ret == -EINTR)
+                       __mem_cgroup_cancel_charge(root_mem_cgroup, 1);
                 if (ret)
-                       /* mem_cgroup_clear_mc() will do uncharge later */
                         return ret;
                 mc.precharge++;
+               cond_resched();
         }
-       return ret;
+       return 0;
  }
  
  /**
@@ -6760,21 +6623,18 @@ static void __mem_cgroup_clear_mc(void)
         /* we must fixup refcnts and charges */
         if (mc.moved_swap) {
                 /* uncharge swap account from the old cgroup */
-               if (!mem_cgroup_is_root(mc.from))
-                       res_counter_uncharge(&mc.from->memsw,
-                                               PAGE_SIZE * mc.moved_swap);
+               res_counter_uncharge(&mc.from->memsw,
+                                    PAGE_SIZE * mc.moved_swap);
  
                 for (i = 0; i < mc.moved_swap; i++)
                         css_put(&mc.from->css);
  
-               if (!mem_cgroup_is_root(mc.to)) {
-                       /*
-                        * we charged both to->res and to->memsw, so we should
-                        * uncharge to->res.
-                        */
-                       res_counter_uncharge(&mc.to->res,
-                                               PAGE_SIZE * mc.moved_swap);
-               }
+               /*
+                * we charged both to->res and to->memsw, so we should
+                * uncharge to->res.
+                */
+               res_counter_uncharge(&mc.to->res,
+                                    PAGE_SIZE * mc.moved_swap);
                 /* we've already done css_get(mc.to) */
                 mc.moved_swap = 0;
         }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c

index a013bc94ebbed4af3764e396b087475310ed5185..44c6bd201d3a1cac7120527b45e2a86f5f77abff 100644 (file)
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1172,6 +1172,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
  
         lock_page(hpage);
  
+       /*
+        * The page could have changed compound pages during the locking.
+        * If this happens just bail out.
+        */
+       if (compound_head(p) != hpage) {
+               action_result(pfn, "different compound page after locking", IGNORED);
+               res = -EBUSY;
+               goto out;
+       }
+
         /*
          * We use page flags to determine what action should be taken, but
          * the flags can be modified by the error containment action.  One
diff --git a/mm/memory.c b/mm/memory.c

index 8b44f765b64584a9a2e7c6f8873d6fcb6acb7726..5c55270729f7b45a1196e8fb4fc5a5374dcc6d9d 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -884,7 +884,7 @@ out_set_pte:
         return 0;
  }
  
-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                    pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
                    unsigned long addr, unsigned long end)
  {
@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
  /*
   * We enter with non-exclusive mmap_sem (to exclude vma changes,
   * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * We return with the mmap_sem locked or unlocked in the same cases
+ * as does filemap_fault().
   */
  static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2688,6 +2691,11 @@ oom:
         return VM_FAULT_OOM;
  }
  
+/*
+ * The mmap_sem must have been held on entry, and may have been
+ * released depending on flags and vma->vm_ops->fault() return value.
+ * See filemap_fault() and __lock_page_retry().
+ */
  static int __do_fault(struct vm_area_struct *vma, unsigned long address,
                 pgoff_t pgoff, unsigned int flags, struct page **page)
  {
@@ -2744,7 +2752,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
         if (write)
                 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
         else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
-               pte_mksoft_dirty(entry);
+               entry = pte_mksoft_dirty(entry);
         if (anon) {
                 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
                 page_add_new_anon_rmap(page, vma, address);
@@ -2758,17 +2766,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
         update_mmu_cache(vma, address, pte);
  }
  
-static unsigned long fault_around_bytes = rounddown_pow_of_two(65536);
-
-static inline unsigned long fault_around_pages(void)
-{
-       return fault_around_bytes >> PAGE_SHIFT;
-}
-
-static inline unsigned long fault_around_mask(void)
-{
-       return ~(fault_around_bytes - 1) & PAGE_MASK;
-}
+static unsigned long fault_around_bytes __read_mostly =
+       rounddown_pow_of_two(65536);
  
  #ifdef CONFIG_DEBUG_FS
  static int fault_around_bytes_get(void *data, u64 *val)
@@ -2834,12 +2833,15 @@ late_initcall(fault_around_debugfs);
  static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
                 pte_t *pte, pgoff_t pgoff, unsigned int flags)
  {
-       unsigned long start_addr;
+       unsigned long start_addr, nr_pages, mask;
         pgoff_t max_pgoff;
         struct vm_fault vmf;
         int off;
  
-       start_addr = max(address & fault_around_mask(), vma->vm_start);
+       nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT;
+       mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
+
+       start_addr = max(address & mask, vma->vm_start);
         off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
         pte -= off;
         pgoff -= off;
@@ -2851,7 +2853,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
         max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
                 PTRS_PER_PTE - 1;
         max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
-                       pgoff + fault_around_pages() - 1);
+                       pgoff + nr_pages - 1);
  
         /* Check if it makes any sense to call ->map_pages */
         while (!pte_none(*pte)) {
@@ -2886,7 +2888,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
          * something).
          */
         if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
-           fault_around_pages() > 1) {
+           fault_around_bytes >> PAGE_SHIFT > 1) {
                 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
                 do_fault_around(vma, address, pte, pgoff, flags);
                 if (!pte_same(*pte, orig_pte))
@@ -3016,6 +3018,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         return ret;
  }
  
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults).
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
  static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 unsigned long address, pte_t *page_table, pmd_t *pmd,
                 unsigned int flags, pte_t orig_pte)
@@ -3040,7 +3048,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
   *
   * We enter with non-exclusive mmap_sem (to exclude vma changes,
   * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
   */
  static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3172,7 +3182,10 @@ out:
   *
   * We enter with non-exclusive mmap_sem (to exclude vma changes,
   * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
   */
  static int handle_pte_fault(struct mm_struct *mm,
                      struct vm_area_struct *vma, unsigned long address,
@@ -3181,7 +3194,7 @@ static int handle_pte_fault(struct mm_struct *mm,
         pte_t entry;
         spinlock_t *ptl;
  
-       entry = *pte;
+       entry = ACCESS_ONCE(*pte);
         if (!pte_present(entry)) {
                 if (pte_none(entry)) {
                         if (vma->vm_ops) {
@@ -3232,6 +3245,9 @@ unlock:
  
  /*
   * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
   */
  static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                              unsigned long address, unsigned int flags)
@@ -3313,6 +3329,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         return handle_pte_fault(mm, vma, address, pte, pmd, flags);
  }
  
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
  int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                     unsigned long address, unsigned int flags)
  {
@@ -3591,11 +3613,13 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                 ret = get_user_pages(tsk, mm, addr, 1,
                                 write, 1, &page, &vma);
                 if (ret <= 0) {
+#ifndef CONFIG_HAVE_IOREMAP_PROT
+                       break;
+#else
                         /*
                          * Check if this is a VM_IO | VM_PFNMAP VMA, which
                          * we can access using slightly different code.
                          */
-#ifdef CONFIG_HAVE_IOREMAP_PROT
                         vma = find_vma(mm, addr);
                         if (!vma || vma->vm_start > addr)
                                 break;
@@ -3603,9 +3627,9 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                                 ret = vma->vm_ops->access(vma, addr, buf,
                                                           len, write);
                         if (ret <= 0)
-#endif
                                 break;
                         bytes = ret;
+#endif
                 } else {
                         bytes = len;
                         offset = addr & (PAGE_SIZE-1);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 469bbf505f85543f7184428016b56964144e400d..2ff8c2325e968b509e983077a41ec4d0f42c00f4 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -284,8 +284,8 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
  }
  #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
  
-static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
-                          unsigned long end_pfn)
+static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
+                                    unsigned long end_pfn)
  {
         unsigned long old_zone_end_pfn;
  
@@ -427,8 +427,8 @@ out_fail:
         return -1;
  }
  
-static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
-                           unsigned long end_pfn)
+static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
+                                     unsigned long end_pfn)
  {
         unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
  
@@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
         zone = page_zone(pfn_to_page(pfn));
  
         ret = -EINVAL;
-       if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
+       if ((zone_idx(zone) > ZONE_NORMAL ||
+           online_type == MMOP_ONLINE_MOVABLE) &&
             !can_online_high_movable(zone))
                 goto out;
  
-       if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
+       if (online_type == MMOP_ONLINE_KERNEL &&
+           zone_idx(zone) == ZONE_MOVABLE) {
                 if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
                         goto out;
         }
-       if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
+       if (online_type == MMOP_ONLINE_MOVABLE &&
+           zone_idx(zone) == ZONE_MOVABLE - 1) {
                 if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
                         goto out;
         }
@@ -1156,6 +1159,34 @@ static int check_hotplug_memory_range(u64 start, u64 size)
         return 0;
  }
  
+/*
+ * If movable zone has already been setup, newly added memory should be check.
+ * If its address is higher than movable zone, it should be added as movable.
+ * Without this check, movable zone may overlap with other zone.
+ */
+static int should_add_memory_movable(int nid, u64 start, u64 size)
+{
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       pg_data_t *pgdat = NODE_DATA(nid);
+       struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
+
+       if (zone_is_empty(movable_zone))
+               return 0;
+
+       if (movable_zone->zone_start_pfn <= start_pfn)
+               return 1;
+
+       return 0;
+}
+
+int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
+{
+       if (should_add_memory_movable(nid, start, size))
+               return ZONE_MOVABLE;
+
+       return zone_default;
+}
+
  /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
  int __ref add_memory(int nid, u64 start, u64 size)
  {
diff --git a/mm/mlock.c b/mm/mlock.c

index b1eb53634005606298d115ac5cdeb90cac923957..ce84cb0b83ef56179facdacc286bce88abd05fae 100644 (file)
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -210,12 +210,19 @@ out:
   * @vma:   target vma
   * @start: start address
   * @end:   end address
+ * @nonblocking:
   *
   * This takes care of making the pages present too.
   *
   * return 0 on success, negative error code on error.
   *
- * vma->vm_mm->mmap_sem must be held for at least read.
+ * vma->vm_mm->mmap_sem must be held.
+ *
+ * If @nonblocking is NULL, it may be held for read or write and will
+ * be unperturbed.
+ *
+ * If @nonblocking is non-NULL, it must held for read only and may be
+ * released.  If it's released, *@nonblocking will be set to 0.
   */
  long __mlock_vma_pages_range(struct vm_area_struct *vma,
                 unsigned long start, unsigned long end, int *nonblocking)
diff --git a/mm/mmap.c b/mm/mmap.c

index 129b847d30cc35c8724cee63e924f69faaf542c6..64c9d736155c7a546e6d133426a0861a63688ead 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
  #include <linux/mempolicy.h>
  #include <linux/rmap.h>
  #include <linux/mmu_notifier.h>
+#include <linux/mmdebug.h>
  #include <linux/perf_event.h>
  #include <linux/audit.h>
  #include <linux/khugepaged.h>
@@ -134,6 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
  {
         unsigned long free, allowed, reserve;
  
+       VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+                       -(s64)vm_committed_as_batch * num_online_cpus(),
+                       "memory commitment underflow");
+
         vm_acct_memory(pages);
  
         /*
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c

index 41cefdf0aaddc46144187cba72ddf3e2629a9f02..950813b1eb3656dc49e66eab4e912fff76dbfc2b 100644 (file)
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -22,6 +22,25 @@
  /* global SRCU for all MMs */
  static struct srcu_struct srcu;
  
+/*
+ * This function allows mmu_notifier::release callback to delay a call to
+ * a function that will free appropriate resources. The function must be
+ * quick and must not block.
+ */
+void mmu_notifier_call_srcu(struct rcu_head *rcu,
+                           void (*func)(struct rcu_head *rcu))
+{
+       call_srcu(&srcu, rcu, func);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
+
+void mmu_notifier_synchronize(void)
+{
+       /* Wait for any running method to finish. */
+       srcu_barrier(&srcu);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
+
  /*
   * This function can't run concurrently against mmu_notifier_register
   * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -53,7 +72,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
                  */
                 if (mn->ops->release)
                         mn->ops->release(mn, mm);
-       srcu_read_unlock(&srcu, id);
  
         spin_lock(&mm->mmu_notifier_mm->lock);
         while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -69,6 +87,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
                 hlist_del_init_rcu(&mn->hlist);
         }
         spin_unlock(&mm->mmu_notifier_mm->lock);
+       srcu_read_unlock(&srcu, id);
  
         /*
          * synchronize_srcu here prevents mmu_notifier_release from returning to
@@ -325,6 +344,25 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
  }
  EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
  
+/*
+ * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
+ */
+void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+                                       struct mm_struct *mm)
+{
+       spin_lock(&mm->mmu_notifier_mm->lock);
+       /*
+        * Can not use list_del_rcu() since __mmu_notifier_release
+        * can delete it before we hold the lock.
+        */
+       hlist_del_init_rcu(&mn->hlist);
+       spin_unlock(&mm->mmu_notifier_mm->lock);
+
+       BUG_ON(atomic_read(&mm->mm_count) <= 0);
+       mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
+
  static int __init mmu_notifier_init(void)
  {
         return init_srcu_struct(&srcu);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c

index 3291e82d4352423cb1cd747eaa589da4b8a07a74..1e11df8fa7ecaecd274a3d0aaa1fe0aea4bb38ab 100644 (file)
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -258,8 +258,6 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
                 unsigned long totalpages, const nodemask_t *nodemask,
                 bool force_kill)
  {
-       if (task->exit_state)
-               return OOM_SCAN_CONTINUE;
         if (oom_unkillable_task(task, NULL, nodemask))
                 return OOM_SCAN_CONTINUE;
  
@@ -559,28 +557,25 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
   * if a parallel OOM killing is already taking place that includes a zone in
   * the zonelist.  Otherwise, locks all zones in the zonelist and returns 1.
   */
-int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
  {
         struct zoneref *z;
         struct zone *zone;
-       int ret = 1;
+       bool ret = true;
  
         spin_lock(&zone_scan_lock);
-       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
                 if (zone_is_oom_locked(zone)) {
-                       ret = 0;
+                       ret = false;
                         goto out;
                 }
-       }
  
-       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
-               /*
-                * Lock each zone in the zonelist under zone_scan_lock so a
-                * parallel invocation of try_set_zonelist_oom() doesn't succeed
-                * when it shouldn't.
-                */
+       /*
+        * Lock each zone in the zonelist under zone_scan_lock so a parallel
+        * call to oom_zonelist_trylock() doesn't succeed when it shouldn't.
+        */
+       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
                 zone_set_flag(zone, ZONE_OOM_LOCKED);
-       }
  
  out:
         spin_unlock(&zone_scan_lock);
@@ -592,15 +587,14 @@ out:
   * allocation attempts with zonelists containing them may now recall the OOM
   * killer, if necessary.
   */
-void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
  {
         struct zoneref *z;
         struct zone *zone;
  
         spin_lock(&zone_scan_lock);
-       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
                 zone_clear_flag(zone, ZONE_OOM_LOCKED);
-       }
         spin_unlock(&zone_scan_lock);
  }
  
@@ -694,9 +688,9 @@ void pagefault_out_of_memory(void)
         if (mem_cgroup_oom_synchronize(true))
                 return;
  
-       zonelist = node_zonelist(first_online_node, GFP_KERNEL);
-       if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
+       zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
+       if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
                 out_of_memory(NULL, 0, 0, NULL, false);
-               clear_zonelist_oom(zonelist, GFP_KERNEL);
+               oom_zonelist_unlock(zonelist, GFP_KERNEL);
         }
  }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index e0c943014eb74ce3d4cb4c021d6f896740386d6d..91d73ef1744d6fbc5c4bbdf9782beb3b3e22da6b 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -261,14 +261,11 @@ static unsigned long global_dirtyable_memory(void)
   */
  void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
  {
+       const unsigned long available_memory = global_dirtyable_memory();
         unsigned long background;
         unsigned long dirty;
-       unsigned long uninitialized_var(available_memory);
         struct task_struct *tsk;
  
-       if (!vm_dirty_bytes || !dirty_background_bytes)
-               available_memory = global_dirtyable_memory();
-
         if (vm_dirty_bytes)
                 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
         else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index ef44ad736ca17f79606021439b89a9fba4455564..18cee0d4c8a20705a4b3e7dd73e7d1d8a8b8d595 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
         int migratetype = 0;
         int batch_free = 0;
         int to_free = count;
+       unsigned long nr_scanned;
  
         spin_lock(&zone->lock);
-       zone->pages_scanned = 0;
+       nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+       if (nr_scanned)
+               __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  
         while (to_free) {
                 struct page *page;
@@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone,
                                 unsigned int order,
                                 int migratetype)
  {
+       unsigned long nr_scanned;
         spin_lock(&zone->lock);
-       zone->pages_scanned = 0;
+       nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+       if (nr_scanned)
+               __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  
         __free_one_page(page, pfn, zone, order, migratetype);
         if (unlikely(!is_migrate_isolate(migratetype)))
@@ -1257,15 +1263,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
  void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  {
         unsigned long flags;
-       int to_drain;
-       unsigned long batch;
+       int to_drain, batch;
  
         local_irq_save(flags);
         batch = ACCESS_ONCE(pcp->batch);
-       if (pcp->count >= batch)
-               to_drain = batch;
-       else
-               to_drain = pcp->count;
+       to_drain = min(pcp->count, batch);
         if (to_drain > 0) {
                 free_pcppages_bulk(zone, to_drain, pcp);
                 pcp->count -= to_drain;
@@ -1610,6 +1612,9 @@ again:
         }
  
         __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+       if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
+           !zone_is_fair_depleted(zone))
+               zone_set_flag(zone, ZONE_FAIR_DEPLETED);
  
         __count_zone_vm_events(PGALLOC, zone, 1 << order);
         zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1712,7 +1717,6 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
  {
         /* free_pages my go negative - that's OK */
         long min = mark;
-       long lowmem_reserve = z->lowmem_reserve[classzone_idx];
         int o;
         long free_cma = 0;
  
@@ -1727,7 +1731,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
                 free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
  #endif
  
-       if (free_pages - free_cma <= min + lowmem_reserve)
+       if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
                 return false;
         for (o = 0; o < order; o++) {
                 /* At the next order, this order's pages become unavailable */
@@ -1922,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
  
  #endif /* CONFIG_NUMA */
  
+static void reset_alloc_batches(struct zone *preferred_zone)
+{
+       struct zone *zone = preferred_zone->zone_pgdat->node_zones;
+
+       do {
+               mod_zone_page_state(zone, NR_ALLOC_BATCH,
+                       high_wmark_pages(zone) - low_wmark_pages(zone) -
+                       atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+               zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+       } while (zone++ != preferred_zone);
+}
+
  /*
   * get_page_from_freelist goes through the zonelist trying to allocate
   * a page.
@@ -1939,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
         int did_zlc_setup = 0;          /* just call zlc_setup() one time */
         bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
                                 (gfp_mask & __GFP_WRITE);
+       int nr_fair_skipped = 0;
+       bool zonelist_rescan;
  
  zonelist_scan:
+       zonelist_rescan = false;
+
         /*
          * Scan zonelist, looking for a zone with enough free.
          * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
@@ -1964,9 +1984,11 @@ zonelist_scan:
                  */
                 if (alloc_flags & ALLOC_FAIR) {
                         if (!zone_local(preferred_zone, zone))
+                               break;
+                       if (zone_is_fair_depleted(zone)) {
+                               nr_fair_skipped++;
                                 continue;
-                       if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
-                               continue;
+                       }
                 }
                 /*
                  * When allocating a page cache page for writing, we
@@ -2072,13 +2094,7 @@ this_zone_full:
                         zlc_mark_zone_full(zonelist, z);
         }
  
-       if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) {
-               /* Disable zlc cache for second zonelist scan */
-               zlc_active = 0;
-               goto zonelist_scan;
-       }
-
-       if (page)
+       if (page) {
                 /*
                  * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
                  * necessary to allocate the page. The expectation is
@@ -2087,8 +2103,37 @@ this_zone_full:
                  * for !PFMEMALLOC purposes.
                  */
                 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
+               return page;
+       }
  
-       return page;
+       /*
+        * The first pass makes sure allocations are spread fairly within the
+        * local node.  However, the local node might have free pages left
+        * after the fairness batches are exhausted, and remote zones haven't
+        * even been considered yet.  Try once more without fairness, and
+        * include remote zones now, before entering the slowpath and waking
+        * kswapd: prefer spilling to a remote zone over swapping locally.
+        */
+       if (alloc_flags & ALLOC_FAIR) {
+               alloc_flags &= ~ALLOC_FAIR;
+               if (nr_fair_skipped) {
+                       zonelist_rescan = true;
+                       reset_alloc_batches(preferred_zone);
+               }
+               if (nr_online_nodes > 1)
+                       zonelist_rescan = true;
+       }
+
+       if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
+               /* Disable zlc cache for second zonelist scan */
+               zlc_active = 0;
+               zonelist_rescan = true;
+       }
+
+       if (zonelist_rescan)
+               goto zonelist_scan;
+
+       return NULL;
  }
  
  /*
@@ -2201,8 +2246,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
  {
         struct page *page;
  
-       /* Acquire the OOM killer lock for the zones in zonelist */
-       if (!try_set_zonelist_oom(zonelist, gfp_mask)) {
+       /* Acquire the per-zone oom lock for each zone */
+       if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
                 schedule_timeout_uninterruptible(1);
                 return NULL;
         }
@@ -2240,7 +2285,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
         out_of_memory(zonelist, gfp_mask, order, nodemask, false);
  
  out:
-       clear_zonelist_oom(zonelist, gfp_mask);
+       oom_zonelist_unlock(zonelist, gfp_mask);
         return page;
  }
  
@@ -2409,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
         return page;
  }
  
-static void reset_alloc_batches(struct zonelist *zonelist,
-                               enum zone_type high_zoneidx,
-                               struct zone *preferred_zone)
-{
-       struct zoneref *z;
-       struct zone *zone;
-
-       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
-               /*
-                * Only reset the batches of zones that were actually
-                * considered in the fairness pass, we don't want to
-                * trash fairness information for zones that are not
-                * actually part of this zonelist's round-robin cycle.
-                */
-               if (!zone_local(preferred_zone, zone))
-                       continue;
-               mod_zone_page_state(zone, NR_ALLOC_BATCH,
-                       high_wmark_pages(zone) - low_wmark_pages(zone) -
-                       atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
-       }
-}
-
  static void wake_all_kswapds(unsigned int order,
                              struct zonelist *zonelist,
                              enum zone_type high_zoneidx,
@@ -2615,14 +2638,6 @@ rebalance:
         if (page)
                 goto got_pg;
  
-       /*
-        * It can become very expensive to allocate transparent hugepages at
-        * fault, so use asynchronous memory compaction for THP unless it is
-        * khugepaged trying to collapse.
-        */
-       if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
-               migration_mode = MIGRATE_SYNC_LIGHT;
-
         /*
          * If compaction is deferred for high-order allocations, it is because
          * sync compaction recently failed. In this is the case and the caller
@@ -2633,6 +2648,15 @@ rebalance:
                                                 (gfp_mask & __GFP_NO_KSWAPD))
                 goto nopage;
  
+       /*
+        * It can become very expensive to allocate transparent hugepages at
+        * fault, so use asynchronous memory compaction for THP unless it is
+        * khugepaged trying to collapse.
+        */
+       if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
+                                               (current->flags & PF_KTHREAD))
+               migration_mode = MIGRATE_SYNC_LIGHT;
+
         /* Try direct reclaim and then allocating */
         page = __alloc_pages_direct_reclaim(gfp_mask, order,
                                         zonelist, high_zoneidx,
@@ -2766,28 +2790,11 @@ retry_cpuset:
         if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
                 alloc_flags |= ALLOC_CMA;
  #endif
-retry:
         /* First allocation attempt */
         page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                         zonelist, high_zoneidx, alloc_flags,
                         preferred_zone, classzone_idx, migratetype);
         if (unlikely(!page)) {
-               /*
-                * The first pass makes sure allocations are spread
-                * fairly within the local node.  However, the local
-                * node might have free pages left after the fairness
-                * batches are exhausted, and remote zones haven't
-                * even been considered yet.  Try once more without
-                * fairness, and include remote zones now, before
-                * entering the slowpath and waking kswapd: prefer
-                * spilling to a remote zone over swapping locally.
-                */
-               if (alloc_flags & ALLOC_FAIR) {
-                       reset_alloc_batches(zonelist, high_zoneidx,
-                                           preferred_zone);
-                       alloc_flags &= ~ALLOC_FAIR;
-                       goto retry;
-               }
                 /*
                  * Runtime PM, block IO and its error handling path
                  * can deadlock because I/O on the device might not
@@ -2962,7 +2969,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
   * Note this is not alloc_pages_exact_node() which allocates on a specific node,
   * but is not exact.
   */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
  {
         unsigned order = get_order(size);
         struct page *p = alloc_pages_node(nid, gfp_mask, order);
@@ -2970,7 +2977,6 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
                 return NULL;
         return make_alloc_exact((unsigned long)page_address(p), order, size);
  }
-EXPORT_SYMBOL(alloc_pages_exact_nid);
  
  /**
   * free_pages_exact - release memory allocated via alloc_pages_exact()
@@ -3052,7 +3058,7 @@ static inline void show_node(struct zone *zone)
  void si_meminfo(struct sysinfo *val)
  {
         val->totalram = totalram_pages;
-       val->sharedram = 0;
+       val->sharedram = global_page_state(NR_SHMEM);
         val->freeram = global_page_state(NR_FREE_PAGES);
         val->bufferram = nr_blockdev_pages();
         val->totalhigh = totalhigh_pages;
@@ -3072,6 +3078,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
         for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
                 managed_pages += pgdat->node_zones[zone_type].managed_pages;
         val->totalram = managed_pages;
+       val->sharedram = node_page_state(nid, NR_SHMEM);
         val->freeram = node_page_state(nid, NR_FREE_PAGES);
  #ifdef CONFIG_HIGHMEM
         val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3253,12 +3260,12 @@ void show_free_areas(unsigned int filter)
                         K(zone_page_state(zone, NR_BOUNCE)),
                         K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
                         K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
-                       zone->pages_scanned,
+                       K(zone_page_state(zone, NR_PAGES_SCANNED)),
                         (!zone_reclaimable(zone) ? "yes" : "no")
                         );
                 printk("lowmem_reserve[]:");
                 for (i = 0; i < MAX_NR_ZONES; i++)
-                       printk(" %lu", zone->lowmem_reserve[i]);
+                       printk(" %ld", zone->lowmem_reserve[i]);
                 printk("\n");
         }
  
@@ -5579,7 +5586,7 @@ static void calculate_totalreserve_pages(void)
         for_each_online_pgdat(pgdat) {
                 for (i = 0; i < MAX_NR_ZONES; i++) {
                         struct zone *zone = pgdat->node_zones + i;
-                       unsigned long max = 0;
+                       long max = 0;
  
                         /* Find valid and maximum lowmem_reserve in the zone */
                         for (j = i; j < MAX_NR_ZONES; j++) {
diff --git a/mm/readahead.c b/mm/readahead.c

index 0ca36a7770b1b974eaea70d6357b7cdbd73aad3d..17b9172ec37f1ef05d49b45f244ec23f6d934ed1 100644 (file)
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -326,7 +326,6 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
   *     - thrashing threshold in memory tight systems
   */
  static pgoff_t count_history_pages(struct address_space *mapping,
-                                  struct file_ra_state *ra,
                                    pgoff_t offset, unsigned long max)
  {
         pgoff_t head;
@@ -349,7 +348,7 @@ static int try_context_readahead(struct address_space *mapping,
  {
         pgoff_t size;
  
-       size = count_history_pages(mapping, ra, offset, max);
+       size = count_history_pages(mapping, offset, max);
  
         /*
          * not enough history pages:
diff --git a/mm/shmem.c b/mm/shmem.c

index af68b15a8fc1f99ede5cf82a38aecfc6b3b6eda6..302d1cf7ad07c385ebfeb381dd42af542b4787a5 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -149,6 +149,19 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
                 vm_unacct_memory(VM_ACCT(size));
  }
  
+static inline int shmem_reacct_size(unsigned long flags,
+               loff_t oldsize, loff_t newsize)
+{
+       if (!(flags & VM_NORESERVE)) {
+               if (VM_ACCT(newsize) > VM_ACCT(oldsize))
+                       return security_vm_enough_memory_mm(current->mm,
+                                       VM_ACCT(newsize) - VM_ACCT(oldsize));
+               else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
+                       vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
+       }
+       return 0;
+}
+
  /*
   * ... whereas tmpfs objects are accounted incrementally as
   * pages are allocated, in order to allow huge sparse files.
@@ -280,7 +293,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
   */
  static int shmem_add_to_page_cache(struct page *page,
                                    struct address_space *mapping,
-                                  pgoff_t index, gfp_t gfp, void *expected)
+                                  pgoff_t index, void *expected)
  {
         int error;
  
@@ -549,6 +562,10 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
                 loff_t newsize = attr->ia_size;
  
                 if (newsize != oldsize) {
+                       error = shmem_reacct_size(SHMEM_I(inode)->flags,
+                                       oldsize, newsize);
+                       if (error)
+                               return error;
                         i_size_write(inode, newsize);
                         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
                 }
@@ -649,7 +666,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
          */
         if (!error)
                 error = shmem_add_to_page_cache(*pagep, mapping, index,
-                                               GFP_NOWAIT, radswap);
+                                               radswap);
         if (error != -ENOMEM) {
                 /*
                  * Truncation and eviction use free_swap_and_cache(), which
@@ -1095,7 +1112,7 @@ repeat:
                                                 gfp & GFP_RECLAIM_MASK);
                 if (!error) {
                         error = shmem_add_to_page_cache(page, mapping, index,
-                                               gfp, swp_to_radix_entry(swap));
+                                               swp_to_radix_entry(swap));
                         /*
                          * We already confirmed swap under page lock, and make
                          * no memory allocation here, so usually no possibility
@@ -1149,7 +1166,7 @@ repeat:
                 __SetPageSwapBacked(page);
                 __set_page_locked(page);
                 if (sgp == SGP_WRITE)
-                       init_page_accessed(page);
+                       __SetPageReferenced(page);
  
                 error = mem_cgroup_charge_file(page, current->mm,
                                                 gfp & GFP_RECLAIM_MASK);
@@ -1158,7 +1175,7 @@ repeat:
                 error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
                 if (!error) {
                         error = shmem_add_to_page_cache(page, mapping, index,
-                                                       gfp, NULL);
+                                                       NULL);
                         radix_tree_preload_end();
                 }
                 if (error) {
@@ -2932,16 +2949,16 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
         this.len = strlen(name);
         this.hash = 0; /* will go */
         sb = shm_mnt->mnt_sb;
+       path.mnt = mntget(shm_mnt);
         path.dentry = d_alloc_pseudo(sb, &this);
         if (!path.dentry)
                 goto put_memory;
         d_set_d_op(path.dentry, &anon_ops);
-       path.mnt = mntget(shm_mnt);
  
         res = ERR_PTR(-ENOSPC);
         inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
         if (!inode)
-               goto put_dentry;
+               goto put_memory;
  
         inode->i_flags |= i_flags;
         d_instantiate(path.dentry, inode);
@@ -2949,19 +2966,19 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
         clear_nlink(inode);     /* It is unlinked */
         res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
         if (IS_ERR(res))
-               goto put_dentry;
+               goto put_path;
  
         res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
                   &shmem_file_operations);
         if (IS_ERR(res))
-               goto put_dentry;
+               goto put_path;
  
         return res;
  
-put_dentry:
-       path_put(&path);
  put_memory:
         shmem_unacct_size(flags, size);
+put_path:
+       path_put(&path);
         return res;
  }
  
diff --git a/mm/slab.c b/mm/slab.c

index 3070b929a1bfa67778e415525403e9b36e392344..2e60bf3dedbb3925a015e1c66c0c871f03f28f6f 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,7 +191,6 @@ struct array_cache {
         unsigned int limit;
         unsigned int batchcount;
         unsigned int touched;
-       spinlock_t lock;
         void *entry[];  /*
                          * Must have this definition in here for the proper
                          * alignment of array_cache. Also simplifies accessing
@@ -203,6 +202,11 @@ struct array_cache {
                          */
  };
  
+struct alien_cache {
+       spinlock_t lock;
+       struct array_cache ac;
+};
+
  #define SLAB_OBJ_PFMEMALLOC    1
  static inline bool is_obj_pfmemalloc(void *objp)
  {
@@ -242,7 +246,8 @@ static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
  static int drain_freelist(struct kmem_cache *cache,
                         struct kmem_cache_node *n, int tofree);
  static void free_block(struct kmem_cache *cachep, void **objpp, int len,
-                       int node);
+                       int node, struct list_head *list);
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
  static void cache_reap(struct work_struct *unused);
  
@@ -267,7 +272,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
  #define MAKE_LIST(cachep, listp, slab, nodeid)                         \
         do {                                                            \
                 INIT_LIST_HEAD(listp);                                  \
-               list_splice(&(cachep->node[nodeid]->slab), listp);      \
+               list_splice(&get_node(cachep, nodeid)->slab, listp);    \
         } while (0)
  
  #define        MAKE_ALL_LISTS(cachep, ptr, nodeid)                             \
@@ -465,143 +470,6 @@ static struct kmem_cache kmem_cache_boot = {
         .name = "kmem_cache",
  };
  
-#define BAD_ALIEN_MAGIC 0x01020304ul
-
-#ifdef CONFIG_LOCKDEP
-
-/*
- * Slab sometimes uses the kmalloc slabs to store the slab headers
- * for other slabs "off slab".
- * The locking for this is tricky in that it nests within the locks
- * of all other slabs in a few places; to deal with this special
- * locking we put on-slab caches into a separate lock-class.
- *
- * We set lock class for alien array caches which are up during init.
- * The lock annotation will be lost if all cpus of a node goes down and
- * then comes back up during hotplug
- */
-static struct lock_class_key on_slab_l3_key;
-static struct lock_class_key on_slab_alc_key;
-
-static struct lock_class_key debugobj_l3_key;
-static struct lock_class_key debugobj_alc_key;
-
-static void slab_set_lock_classes(struct kmem_cache *cachep,
-               struct lock_class_key *l3_key, struct lock_class_key *alc_key,
-               int q)
-{
-       struct array_cache **alc;
-       struct kmem_cache_node *n;
-       int r;
-
-       n = cachep->node[q];
-       if (!n)
-               return;
-
-       lockdep_set_class(&n->list_lock, l3_key);
-       alc = n->alien;
-       /*
-        * FIXME: This check for BAD_ALIEN_MAGIC
-        * should go away when common slab code is taught to
-        * work even without alien caches.
-        * Currently, non NUMA code returns BAD_ALIEN_MAGIC
-        * for alloc_alien_cache,
-        */
-       if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
-               return;
-       for_each_node(r) {
-               if (alc[r])
-                       lockdep_set_class(&alc[r]->lock, alc_key);
-       }
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-       slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-       int node;
-
-       for_each_online_node(node)
-               slab_set_debugobj_lock_classes_node(cachep, node);
-}
-
-static void init_node_lock_keys(int q)
-{
-       int i;
-
-       if (slab_state < UP)
-               return;
-
-       for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
-               struct kmem_cache_node *n;
-               struct kmem_cache *cache = kmalloc_caches[i];
-
-               if (!cache)
-                       continue;
-
-               n = cache->node[q];
-               if (!n || OFF_SLAB(cache))
-                       continue;
-
-               slab_set_lock_classes(cache, &on_slab_l3_key,
-                               &on_slab_alc_key, q);
-       }
-}
-
-static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
-{
-       if (!cachep->node[q])
-               return;
-
-       slab_set_lock_classes(cachep, &on_slab_l3_key,
-                       &on_slab_alc_key, q);
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-       int node;
-
-       VM_BUG_ON(OFF_SLAB(cachep));
-       for_each_node(node)
-               on_slab_lock_classes_node(cachep, node);
-}
-
-static inline void init_lock_keys(void)
-{
-       int node;
-
-       for_each_node(node)
-               init_node_lock_keys(node);
-}
-#else
-static void init_node_lock_keys(int q)
-{
-}
-
-static inline void init_lock_keys(void)
-{
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-}
-
-static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-}
-#endif
-
  static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
  
  static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -792,13 +660,8 @@ static void start_cpu_timer(int cpu)
         }
  }
  
-static struct array_cache *alloc_arraycache(int node, int entries,
-                                           int batchcount, gfp_t gfp)
+static void init_arraycache(struct array_cache *ac, int limit, int batch)
  {
-       int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
-       struct array_cache *nc = NULL;
-
-       nc = kmalloc_node(memsize, gfp, node);
         /*
          * The array_cache structures contain pointers to free object.
          * However, when such objects are allocated or transferred to another
@@ -806,15 +669,24 @@ static struct array_cache *alloc_arraycache(int node, int entries,
          * valid references during a kmemleak scan. Therefore, kmemleak must
          * not scan such objects.
          */
-       kmemleak_no_scan(nc);
-       if (nc) {
-               nc->avail = 0;
-               nc->limit = entries;
-               nc->batchcount = batchcount;
-               nc->touched = 0;
-               spin_lock_init(&nc->lock);
+       kmemleak_no_scan(ac);
+       if (ac) {
+               ac->avail = 0;
+               ac->limit = limit;
+               ac->batchcount = batch;
+               ac->touched = 0;
         }
-       return nc;
+}
+
+static struct array_cache *alloc_arraycache(int node, int entries,
+                                           int batchcount, gfp_t gfp)
+{
+       size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
+       struct array_cache *ac = NULL;
+
+       ac = kmalloc_node(memsize, gfp, node);
+       init_arraycache(ac, entries, batchcount);
+       return ac;
  }
  
  static inline bool is_slab_pfmemalloc(struct page *page)
@@ -826,7 +698,7 @@ static inline bool is_slab_pfmemalloc(struct page *page)
  static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
                                                 struct array_cache *ac)
  {
-       struct kmem_cache_node *n = cachep->node[numa_mem_id()];
+       struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
         struct page *page;
         unsigned long flags;
  
@@ -881,7 +753,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
                  * If there are empty slabs on the slabs_free list and we are
                  * being forced to refill the cache, mark this one !pfmemalloc.
                  */
-               n = cachep->node[numa_mem_id()];
+               n = get_node(cachep, numa_mem_id());
                 if (!list_empty(&n->slabs_free) && force_refill) {
                         struct page *page = virt_to_head_page(objp);
                         ClearPageSlabPfmemalloc(page);
@@ -961,12 +833,13 @@ static int transfer_objects(struct array_cache *to,
  #define drain_alien_cache(cachep, alien) do { } while (0)
  #define reap_alien(cachep, n) do { } while (0)
  
-static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static inline struct alien_cache **alloc_alien_cache(int node,
+                                               int limit, gfp_t gfp)
  {
-       return (struct array_cache **)BAD_ALIEN_MAGIC;
+       return NULL;
  }
  
-static inline void free_alien_cache(struct array_cache **ac_ptr)
+static inline void free_alien_cache(struct alien_cache **ac_ptr)
  {
  }
  
@@ -992,46 +865,60 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
  static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
  static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
  
-static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static struct alien_cache *__alloc_alien_cache(int node, int entries,
+                                               int batch, gfp_t gfp)
+{
+       size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
+       struct alien_cache *alc = NULL;
+
+       alc = kmalloc_node(memsize, gfp, node);
+       init_arraycache(&alc->ac, entries, batch);
+       spin_lock_init(&alc->lock);
+       return alc;
+}
+
+static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
  {
-       struct array_cache **ac_ptr;
-       int memsize = sizeof(void *) * nr_node_ids;
+       struct alien_cache **alc_ptr;
+       size_t memsize = sizeof(void *) * nr_node_ids;
         int i;
  
         if (limit > 1)
                 limit = 12;
-       ac_ptr = kzalloc_node(memsize, gfp, node);
-       if (ac_ptr) {
-               for_each_node(i) {
-                       if (i == node || !node_online(i))
-                               continue;
-                       ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
-                       if (!ac_ptr[i]) {
-                               for (i--; i >= 0; i--)
-                                       kfree(ac_ptr[i]);
-                               kfree(ac_ptr);
-                               return NULL;
-                       }
+       alc_ptr = kzalloc_node(memsize, gfp, node);
+       if (!alc_ptr)
+               return NULL;
+
+       for_each_node(i) {
+               if (i == node || !node_online(i))
+                       continue;
+               alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
+               if (!alc_ptr[i]) {
+                       for (i--; i >= 0; i--)
+                               kfree(alc_ptr[i]);
+                       kfree(alc_ptr);
+                       return NULL;
                 }
         }
-       return ac_ptr;
+       return alc_ptr;
  }
  
-static void free_alien_cache(struct array_cache **ac_ptr)
+static void free_alien_cache(struct alien_cache **alc_ptr)
  {
         int i;
  
-       if (!ac_ptr)
+       if (!alc_ptr)
                 return;
         for_each_node(i)
-           kfree(ac_ptr[i]);
-       kfree(ac_ptr);
+           kfree(alc_ptr[i]);
+       kfree(alc_ptr);
  }
  
  static void __drain_alien_cache(struct kmem_cache *cachep,
-                               struct array_cache *ac, int node)
+                               struct array_cache *ac, int node,
+                               struct list_head *list)
  {
-       struct kmem_cache_node *n = cachep->node[node];
+       struct kmem_cache_node *n = get_node(cachep, node);
  
         if (ac->avail) {
                 spin_lock(&n->list_lock);
@@ -1043,7 +930,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
                 if (n->shared)
                         transfer_objects(n->shared, ac, ac->limit);
  
-               free_block(cachep, ac->entry, ac->avail, node);
+               free_block(cachep, ac->entry, ac->avail, node, list);
                 ac->avail = 0;
                 spin_unlock(&n->list_lock);
         }
@@ -1057,28 +944,40 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
         int node = __this_cpu_read(slab_reap_node);
  
         if (n->alien) {
-               struct array_cache *ac = n->alien[node];
+               struct alien_cache *alc = n->alien[node];
+               struct array_cache *ac;
+
+               if (alc) {
+                       ac = &alc->ac;
+                       if (ac->avail && spin_trylock_irq(&alc->lock)) {
+                               LIST_HEAD(list);
  
-               if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
-                       __drain_alien_cache(cachep, ac, node);
-                       spin_unlock_irq(&ac->lock);
+                               __drain_alien_cache(cachep, ac, node, &list);
+                               spin_unlock_irq(&alc->lock);
+                               slabs_destroy(cachep, &list);
+                       }
                 }
         }
  }
  
  static void drain_alien_cache(struct kmem_cache *cachep,
-                               struct array_cache **alien)
+                               struct alien_cache **alien)
  {
         int i = 0;
+       struct alien_cache *alc;
         struct array_cache *ac;
         unsigned long flags;
  
         for_each_online_node(i) {
-               ac = alien[i];
-               if (ac) {
-                       spin_lock_irqsave(&ac->lock, flags);
-                       __drain_alien_cache(cachep, ac, i);
-                       spin_unlock_irqrestore(&ac->lock, flags);
+               alc = alien[i];
+               if (alc) {
+                       LIST_HEAD(list);
+
+                       ac = &alc->ac;
+                       spin_lock_irqsave(&alc->lock, flags);
+                       __drain_alien_cache(cachep, ac, i, &list);
+                       spin_unlock_irqrestore(&alc->lock, flags);
+                       slabs_destroy(cachep, &list);
                 }
         }
  }
@@ -1087,8 +986,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
  {
         int nodeid = page_to_nid(virt_to_page(objp));
         struct kmem_cache_node *n;
-       struct array_cache *alien = NULL;
+       struct alien_cache *alien = NULL;
+       struct array_cache *ac;
         int node;
+       LIST_HEAD(list);
  
         node = numa_mem_id();
  
@@ -1099,21 +1000,25 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
         if (likely(nodeid == node))
                 return 0;
  
-       n = cachep->node[node];
+       n = get_node(cachep, node);
         STATS_INC_NODEFREES(cachep);
         if (n->alien && n->alien[nodeid]) {
                 alien = n->alien[nodeid];
+               ac = &alien->ac;
                 spin_lock(&alien->lock);
-               if (unlikely(alien->avail == alien->limit)) {
+               if (unlikely(ac->avail == ac->limit)) {
                         STATS_INC_ACOVERFLOW(cachep);
-                       __drain_alien_cache(cachep, alien, nodeid);
+                       __drain_alien_cache(cachep, ac, nodeid, &list);
                 }
-               ac_put_obj(cachep, alien, objp);
+               ac_put_obj(cachep, ac, objp);
                 spin_unlock(&alien->lock);
+               slabs_destroy(cachep, &list);
         } else {
-               spin_lock(&(cachep->node[nodeid])->list_lock);
-               free_block(cachep, &objp, 1, nodeid);
-               spin_unlock(&(cachep->node[nodeid])->list_lock);
+               n = get_node(cachep, nodeid);
+               spin_lock(&n->list_lock);
+               free_block(cachep, &objp, 1, nodeid, &list);
+               spin_unlock(&n->list_lock);
+               slabs_destroy(cachep, &list);
         }
         return 1;
  }
@@ -1132,7 +1037,7 @@ static int init_cache_node_node(int node)
  {
         struct kmem_cache *cachep;
         struct kmem_cache_node *n;
-       const int memsize = sizeof(struct kmem_cache_node);
+       const size_t memsize = sizeof(struct kmem_cache_node);
  
         list_for_each_entry(cachep, &slab_caches, list) {
                 /*
@@ -1140,7 +1045,8 @@ static int init_cache_node_node(int node)
                  * begin anything. Make sure some other cpu on this
                  * node has not already allocated this
                  */
-               if (!cachep->node[node]) {
+               n = get_node(cachep, node);
+               if (!n) {
                         n = kmalloc_node(memsize, GFP_KERNEL, node);
                         if (!n)
                                 return -ENOMEM;
@@ -1156,11 +1062,11 @@ static int init_cache_node_node(int node)
                         cachep->node[node] = n;
                 }
  
-               spin_lock_irq(&cachep->node[node]->list_lock);
-               cachep->node[node]->free_limit =
+               spin_lock_irq(&n->list_lock);
+               n->free_limit =
                         (1 + nr_cpus_node(node)) *
                         cachep->batchcount + cachep->num;
-               spin_unlock_irq(&cachep->node[node]->list_lock);
+               spin_unlock_irq(&n->list_lock);
         }
         return 0;
  }
@@ -1181,12 +1087,13 @@ static void cpuup_canceled(long cpu)
         list_for_each_entry(cachep, &slab_caches, list) {
                 struct array_cache *nc;
                 struct array_cache *shared;
-               struct array_cache **alien;
+               struct alien_cache **alien;
+               LIST_HEAD(list);
  
                 /* cpu is dead; no one can alloc from it. */
                 nc = cachep->array[cpu];
                 cachep->array[cpu] = NULL;
-               n = cachep->node[node];
+               n = get_node(cachep, node);
  
                 if (!n)
                         goto free_array_cache;
@@ -1196,7 +1103,7 @@ static void cpuup_canceled(long cpu)
                 /* Free limit for this kmem_cache_node */
                 n->free_limit -= cachep->batchcount;
                 if (nc)
-                       free_block(cachep, nc->entry, nc->avail, node);
+                       free_block(cachep, nc->entry, nc->avail, node, &list);
  
                 if (!cpumask_empty(mask)) {
                         spin_unlock_irq(&n->list_lock);
@@ -1206,7 +1113,7 @@ static void cpuup_canceled(long cpu)
                 shared = n->shared;
                 if (shared) {
                         free_block(cachep, shared->entry,
-                                  shared->avail, node);
+                                  shared->avail, node, &list);
                         n->shared = NULL;
                 }
  
@@ -1221,6 +1128,7 @@ static void cpuup_canceled(long cpu)
                         free_alien_cache(alien);
                 }
  free_array_cache:
+               slabs_destroy(cachep, &list);
                 kfree(nc);
         }
         /*
@@ -1229,7 +1137,7 @@ free_array_cache:
          * shrink each nodelist to its limit.
          */
         list_for_each_entry(cachep, &slab_caches, list) {
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                 if (!n)
                         continue;
                 drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -1260,7 +1168,7 @@ static int cpuup_prepare(long cpu)
         list_for_each_entry(cachep, &slab_caches, list) {
                 struct array_cache *nc;
                 struct array_cache *shared = NULL;
-               struct array_cache **alien = NULL;
+               struct alien_cache **alien = NULL;
  
                 nc = alloc_arraycache(node, cachep->limit,
                                         cachep->batchcount, GFP_KERNEL);
@@ -1284,7 +1192,7 @@ static int cpuup_prepare(long cpu)
                         }
                 }
                 cachep->array[cpu] = nc;
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                 BUG_ON(!n);
  
                 spin_lock_irq(&n->list_lock);
@@ -1305,13 +1213,7 @@ static int cpuup_prepare(long cpu)
                 spin_unlock_irq(&n->list_lock);
                 kfree(shared);
                 free_alien_cache(alien);
-               if (cachep->flags & SLAB_DEBUG_OBJECTS)
-                       slab_set_debugobj_lock_classes_node(cachep, node);
-               else if (!OFF_SLAB(cachep) &&
-                        !(cachep->flags & SLAB_DESTROY_BY_RCU))
-                       on_slab_lock_classes_node(cachep, node);
         }
-       init_node_lock_keys(node);
  
         return 0;
  bad:
@@ -1395,7 +1297,7 @@ static int __meminit drain_cache_node_node(int node)
         list_for_each_entry(cachep, &slab_caches, list) {
                 struct kmem_cache_node *n;
  
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                 if (!n)
                         continue;
  
@@ -1575,10 +1477,6 @@ void __init kmem_cache_init(void)
  
                 memcpy(ptr, cpu_cache_get(kmem_cache),
                        sizeof(struct arraycache_init));
-               /*
-                * Do not assume that spinlocks can be initialized via memcpy:
-                */
-               spin_lock_init(&ptr->lock);
  
                 kmem_cache->array[smp_processor_id()] = ptr;
  
@@ -1588,10 +1486,6 @@ void __init kmem_cache_init(void)
                        != &initarray_generic.cache);
                 memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
                        sizeof(struct arraycache_init));
-               /*
-                * Do not assume that spinlocks can be initialized via memcpy:
-                */
-               spin_lock_init(&ptr->lock);
  
                 kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
         }
@@ -1628,9 +1522,6 @@ void __init kmem_cache_init_late(void)
                         BUG();
         mutex_unlock(&slab_mutex);
  
-       /* Annotate slab for lockdep -- annotate the malloc caches */
-       init_lock_keys();
-
         /* Done! */
         slab_state = FULL;
  
@@ -1690,14 +1581,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
         printk(KERN_WARNING "  cache: %s, object size: %d, order: %d\n",
                 cachep->name, cachep->size, cachep->gfporder);
  
-       for_each_online_node(node) {
+       for_each_kmem_cache_node(cachep, node, n) {
                 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
                 unsigned long active_slabs = 0, num_slabs = 0;
  
-               n = cachep->node[node];
-               if (!n)
-                       continue;
-
                 spin_lock_irqsave(&n->list_lock, flags);
                 list_for_each_entry(page, &n->slabs_full, lru) {
                         active_objs += cachep->num;
@@ -1724,7 +1611,8 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
  }
  
  /*
- * Interface to system's page allocator. No need to hold the cache-lock.
+ * Interface to system's page allocator. No need to hold the
+ * kmem_cache_node ->list_lock.
   *
   * If we requested dmaable memory, we will get it. Even if we
   * did not request dmaable memory, we might get it, but that
@@ -2026,9 +1914,9 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
   * @cachep: cache pointer being destroyed
   * @page: page pointer being destroyed
   *
- * Destroy all the objs in a slab, and release the mem back to the system.
- * Before calling the slab must have been unlinked from the cache.  The
- * cache-lock is not held/needed.
+ * Destroy all the objs in a slab page, and release the mem back to the system.
+ * Before calling the slab page must have been unlinked from the cache. The
+ * kmem_cache_node ->list_lock is not held/needed.
   */
  static void slab_destroy(struct kmem_cache *cachep, struct page *page)
  {
@@ -2060,6 +1948,16 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
                 kmem_cache_free(cachep->freelist_cache, freelist);
  }
  
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
+{
+       struct page *page, *n;
+
+       list_for_each_entry_safe(page, n, list, lru) {
+               list_del(&page->lru);
+               slab_destroy(cachep, page);
+       }
+}
+
  /**
   * calculate_slab_order - calculate size (page order) of slabs
   * @cachep: pointer to the cache that is being created
@@ -2405,17 +2303,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                 return err;
         }
  
-       if (flags & SLAB_DEBUG_OBJECTS) {
-               /*
-                * Would deadlock through slab_destroy()->call_rcu()->
-                * debug_object_activate()->kmem_cache_alloc().
-                */
-               WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
-
-               slab_set_debugobj_lock_classes(cachep);
-       } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
-               on_slab_lock_classes(cachep);
-
         return 0;
  }
  
@@ -2434,7 +2321,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
  {
  #ifdef CONFIG_SMP
         check_irq_off();
-       assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
+       assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
  #endif
  }
  
@@ -2442,7 +2329,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
  {
  #ifdef CONFIG_SMP
         check_irq_off();
-       assert_spin_locked(&cachep->node[node]->list_lock);
+       assert_spin_locked(&get_node(cachep, node)->list_lock);
  #endif
  }
  
@@ -2462,12 +2349,16 @@ static void do_drain(void *arg)
         struct kmem_cache *cachep = arg;
         struct array_cache *ac;
         int node = numa_mem_id();
+       struct kmem_cache_node *n;
+       LIST_HEAD(list);
  
         check_irq_off();
         ac = cpu_cache_get(cachep);
-       spin_lock(&cachep->node[node]->list_lock);
-       free_block(cachep, ac->entry, ac->avail, node);
-       spin_unlock(&cachep->node[node]->list_lock);
+       n = get_node(cachep, node);
+       spin_lock(&n->list_lock);
+       free_block(cachep, ac->entry, ac->avail, node, &list);
+       spin_unlock(&n->list_lock);
+       slabs_destroy(cachep, &list);
         ac->avail = 0;
  }
  
@@ -2478,17 +2369,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
  
         on_each_cpu(do_drain, cachep, 1);
         check_irq_on();
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (n && n->alien)
+       for_each_kmem_cache_node(cachep, node, n)
+               if (n->alien)
                         drain_alien_cache(cachep, n->alien);
-       }
  
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (n)
-                       drain_array(cachep, n, n->shared, 1, node);
-       }
+       for_each_kmem_cache_node(cachep, node, n)
+               drain_array(cachep, n, n->shared, 1, node);
  }
  
  /*
@@ -2534,17 +2420,14 @@ out:
  
  int __kmem_cache_shrink(struct kmem_cache *cachep)
  {
-       int ret = 0, i = 0;
+       int ret = 0;
+       int node;
         struct kmem_cache_node *n;
  
         drain_cpu_caches(cachep);
  
         check_irq_on();
-       for_each_online_node(i) {
-               n = cachep->node[i];
-               if (!n)
-                       continue;
-
+       for_each_kmem_cache_node(cachep, node, n) {
                 drain_freelist(cachep, n, slabs_tofree(cachep, n));
  
                 ret += !list_empty(&n->slabs_full) ||
@@ -2566,13 +2449,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
             kfree(cachep->array[i]);
  
         /* NUMA: free the node structures */
-       for_each_online_node(i) {
-               n = cachep->node[i];
-               if (n) {
-                       kfree(n->shared);
-                       free_alien_cache(n->alien);
-                       kfree(n);
-               }
+       for_each_kmem_cache_node(cachep, i, n) {
+               kfree(n->shared);
+               free_alien_cache(n->alien);
+               kfree(n);
+               cachep->node[i] = NULL;
         }
         return 0;
  }
@@ -2751,7 +2632,7 @@ static int cache_grow(struct kmem_cache *cachep,
  
         /* Take the node list lock to change the colour_next on this node */
         check_irq_off();
-       n = cachep->node[nodeid];
+       n = get_node(cachep, nodeid);
         spin_lock(&n->list_lock);
  
         /* Get colour for the slab, and cal the next value. */
@@ -2920,7 +2801,7 @@ retry:
                  */
                 batchcount = BATCHREFILL_LIMIT;
         }
-       n = cachep->node[node];
+       n = get_node(cachep, node);
  
         BUG_ON(ac->avail > 0 || !n);
         spin_lock(&n->list_lock);
@@ -3060,7 +2941,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
  
  static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
  {
-       if (cachep == kmem_cache)
+       if (unlikely(cachep == kmem_cache))
                 return false;
  
         return should_failslab(cachep->object_size, flags, cachep->flags);
@@ -3169,8 +3050,8 @@ retry:
                 nid = zone_to_nid(zone);
  
                 if (cpuset_zone_allowed_hardwall(zone, flags) &&
-                       cache->node[nid] &&
-                       cache->node[nid]->free_objects) {
+                       get_node(cache, nid) &&
+                       get_node(cache, nid)->free_objects) {
                                 obj = ____cache_alloc_node(cache,
                                         flags | GFP_THISNODE, nid);
                                 if (obj)
@@ -3233,7 +3114,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
         int x;
  
         VM_BUG_ON(nodeid > num_online_nodes());
-       n = cachep->node[nodeid];
+       n = get_node(cachep, nodeid);
         BUG_ON(!n);
  
  retry:
@@ -3304,7 +3185,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
         if (nodeid == NUMA_NO_NODE)
                 nodeid = slab_node;
  
-       if (unlikely(!cachep->node[nodeid])) {
+       if (unlikely(!get_node(cachep, nodeid))) {
                 /* Node not bootstrapped yet */
                 ptr = fallback_alloc(cachep, flags);
                 goto out;
@@ -3405,12 +3286,13 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
  
  /*
   * Caller needs to acquire correct kmem_cache_node's list_lock
+ * @list: List of detached free slabs should be freed by caller
   */
-static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
-                      int node)
+static void free_block(struct kmem_cache *cachep, void **objpp,
+                       int nr_objects, int node, struct list_head *list)
  {
         int i;
-       struct kmem_cache_node *n;
+       struct kmem_cache_node *n = get_node(cachep, node);
  
         for (i = 0; i < nr_objects; i++) {
                 void *objp;
@@ -3420,7 +3302,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                 objp = objpp[i];
  
                 page = virt_to_head_page(objp);
-               n = cachep->node[node];
                 list_del(&page->lru);
                 check_spinlock_acquired_node(cachep, node);
                 slab_put_obj(cachep, page, objp, node);
@@ -3431,13 +3312,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                 if (page->active == 0) {
                         if (n->free_objects > n->free_limit) {
                                 n->free_objects -= cachep->num;
-                               /* No need to drop any previously held
-                                * lock here, even if we have a off-slab slab
-                                * descriptor it is guaranteed to come from
-                                * a different cache, refer to comments before
-                                * alloc_slabmgmt.
-                                */
-                               slab_destroy(cachep, page);
+                               list_add_tail(&page->lru, list);
                         } else {
                                 list_add(&page->lru, &n->slabs_free);
                         }
@@ -3456,13 +3331,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
         int batchcount;
         struct kmem_cache_node *n;
         int node = numa_mem_id();
+       LIST_HEAD(list);
  
         batchcount = ac->batchcount;
  #if DEBUG
         BUG_ON(!batchcount || batchcount > ac->avail);
  #endif
         check_irq_off();
-       n = cachep->node[node];
+       n = get_node(cachep, node);
         spin_lock(&n->list_lock);
         if (n->shared) {
                 struct array_cache *shared_array = n->shared;
@@ -3477,7 +3353,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
                 }
         }
  
-       free_block(cachep, ac->entry, batchcount, node);
+       free_block(cachep, ac->entry, batchcount, node, &list);
  free_done:
  #if STATS
         {
@@ -3498,6 +3374,7 @@ free_done:
         }
  #endif
         spin_unlock(&n->list_lock);
+       slabs_destroy(cachep, &list);
         ac->avail -= batchcount;
         memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
  }
@@ -3754,7 +3631,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
         int node;
         struct kmem_cache_node *n;
         struct array_cache *new_shared;
-       struct array_cache **new_alien = NULL;
+       struct alien_cache **new_alien = NULL;
  
         for_each_online_node(node) {
  
@@ -3775,15 +3652,16 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
                         }
                 }
  
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                 if (n) {
                         struct array_cache *shared = n->shared;
+                       LIST_HEAD(list);
  
                         spin_lock_irq(&n->list_lock);
  
                         if (shared)
                                 free_block(cachep, shared->entry,
-                                               shared->avail, node);
+                                               shared->avail, node, &list);
  
                         n->shared = new_shared;
                         if (!n->alien) {
@@ -3793,6 +3671,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
                         n->free_limit = (1 + nr_cpus_node(node)) *
                                         cachep->batchcount + cachep->num;
                         spin_unlock_irq(&n->list_lock);
+                       slabs_destroy(cachep, &list);
                         kfree(shared);
                         free_alien_cache(new_alien);
                         continue;
@@ -3820,9 +3699,8 @@ fail:
                 /* Cache is not active yet. Roll back what we did */
                 node--;
                 while (node >= 0) {
-                       if (cachep->node[node]) {
-                               n = cachep->node[node];
-
+                       n = get_node(cachep, node);
+                       if (n) {
                                 kfree(n->shared);
                                 free_alien_cache(n->alien);
                                 kfree(n);
@@ -3883,12 +3761,20 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
         cachep->shared = shared;
  
         for_each_online_cpu(i) {
+               LIST_HEAD(list);
                 struct array_cache *ccold = new->new[i];
+               int node;
+               struct kmem_cache_node *n;
+
                 if (!ccold)
                         continue;
-               spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
-               free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
-               spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+
+               node = cpu_to_mem(i);
+               n = get_node(cachep, node);
+               spin_lock_irq(&n->list_lock);
+               free_block(cachep, ccold->entry, ccold->avail, node, &list);
+               spin_unlock_irq(&n->list_lock);
+               slabs_destroy(cachep, &list);
                 kfree(ccold);
         }
         kfree(new);
@@ -3996,6 +3882,7 @@ skip_setup:
  static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
                          struct array_cache *ac, int force, int node)
  {
+       LIST_HEAD(list);
         int tofree;
  
         if (!ac || !ac->avail)
@@ -4008,12 +3895,13 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
                         tofree = force ? ac->avail : (ac->limit + 4) / 5;
                         if (tofree > ac->avail)
                                 tofree = (ac->avail + 1) / 2;
-                       free_block(cachep, ac->entry, tofree, node);
+                       free_block(cachep, ac->entry, tofree, node, &list);
                         ac->avail -= tofree;
                         memmove(ac->entry, &(ac->entry[tofree]),
                                 sizeof(void *) * ac->avail);
                 }
                 spin_unlock_irq(&n->list_lock);
+               slabs_destroy(cachep, &list);
         }
  }
  
@@ -4048,7 +3936,7 @@ static void cache_reap(struct work_struct *w)
                  * have established with reasonable certainty that
                  * we can do some work if the lock was obtained.
                  */
-               n = searchp->node[node];
+               n = get_node(searchp, node);
  
                 reap_alien(searchp, n);
  
@@ -4100,10 +3988,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
  
         active_objs = 0;
         num_slabs = 0;
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (!n)
-                       continue;
+       for_each_kmem_cache_node(cachep, node, n) {
  
                 check_irq_on();
                 spin_lock_irq(&n->list_lock);
@@ -4328,10 +4213,7 @@ static int leaks_show(struct seq_file *m, void *p)
  
         x[1] = 0;
  
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (!n)
-                       continue;
+       for_each_kmem_cache_node(cachep, node, n) {
  
                 check_irq_on();
                 spin_lock_irq(&n->list_lock);
diff --git a/mm/slab.h b/mm/slab.h

index 961a3fb1f5a2c69454e123e5a2d7c34b652c2a1c..0e0fdd3658409e0eee9a89fa51b1ea45cfc96466 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -256,13 +256,12 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
                 return cachep;
  
         pr_err("%s: Wrong slab cache. %s but object is from %s\n",
-               __FUNCTION__, cachep->name, s->name);
+              __func__, cachep->name, s->name);
         WARN_ON_ONCE(1);
         return s;
  }
-#endif
-
  
+#ifndef CONFIG_SLOB
  /*
   * The slab lists for all objects.
   */
@@ -277,7 +276,7 @@ struct kmem_cache_node {
         unsigned int free_limit;
         unsigned int colour_next;       /* Per-node cache coloring */
         struct array_cache *shared;     /* shared per node */
-       struct array_cache **alien;     /* on other nodes */
+       struct alien_cache **alien;     /* on other nodes */
         unsigned long next_reap;        /* updated without locking */
         int free_touched;               /* updated without locking */
  #endif
@@ -294,5 +293,22 @@ struct kmem_cache_node {
  
  };
  
+static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
+{
+       return s->node[node];
+}
+
+/*
+ * Iterator over all nodes. The body will be executed for each node that has
+ * a kmem_cache_node structure allocated (which is true for all online nodes)
+ */
+#define for_each_kmem_cache_node(__s, __node, __n) \
+       for (__node = 0; __n = get_node(__s, __node), __node < nr_node_ids; __node++) \
+                if (__n)
+
+#endif
+
  void *slab_next(struct seq_file *m, void *p, loff_t *pos);
  void slab_stop(struct seq_file *m, void *p);
+
+#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c

index d31c4bacc6a203b0bc555bd76c2a97e90e78fa6c..d319502b24038b7ad0aee1023b88c5c6501fc39e 100644 (file)
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -19,6 +19,8 @@
  #include <asm/tlbflush.h>
  #include <asm/page.h>
  #include <linux/memcontrol.h>
+
+#define CREATE_TRACE_POINTS
  #include <trace/events/kmem.h>
  
  #include "slab.h"
@@ -787,3 +789,102 @@ static int __init slab_proc_init(void)
  }
  module_init(slab_proc_init);
  #endif /* CONFIG_SLABINFO */
+
+static __always_inline void *__do_krealloc(const void *p, size_t new_size,
+                                          gfp_t flags)
+{
+       void *ret;
+       size_t ks = 0;
+
+       if (p)
+               ks = ksize(p);
+
+       if (ks >= new_size)
+               return (void *)p;
+
+       ret = kmalloc_track_caller(new_size, flags);
+       if (ret && p)
+               memcpy(ret, p, ks);
+
+       return ret;
+}
+
+/**
+ * __krealloc - like krealloc() but don't free @p.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * This function is like krealloc() except it never frees the originally
+ * allocated buffer. Use this if you don't want to free the buffer immediately
+ * like, for example, with RCU.
+ */
+void *__krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+       if (unlikely(!new_size))
+               return ZERO_SIZE_PTR;
+
+       return __do_krealloc(p, new_size, flags);
+
+}
+EXPORT_SYMBOL(__krealloc);
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * The contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.  If @p is %NULL, krealloc()
+ * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
+ * %NULL pointer, the object pointed to is freed.
+ */
+void *krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+       void *ret;
+
+       if (unlikely(!new_size)) {
+               kfree(p);
+               return ZERO_SIZE_PTR;
+       }
+
+       ret = __do_krealloc(p, new_size, flags);
+       if (ret && p != ret)
+               kfree(p);
+
+       return ret;
+}
+EXPORT_SYMBOL(krealloc);
+
+/**
+ * kzfree - like kfree but zero memory
+ * @p: object to free memory of
+ *
+ * The memory of the object @p points to is zeroed before freed.
+ * If @p is %NULL, kzfree() does nothing.
+ *
+ * Note: this function zeroes the whole allocated buffer which can be a good
+ * deal bigger than the requested buffer size passed to kmalloc(). So be
+ * careful when using this function in performance sensitive code.
+ */
+void kzfree(const void *p)
+{
+       size_t ks;
+       void *mem = (void *)p;
+
+       if (unlikely(ZERO_OR_NULL_PTR(mem)))
+               return;
+       ks = ksize(mem);
+       memset(mem, 0, ks);
+       kfree(mem);
+}
+EXPORT_SYMBOL(kzfree);
+
+/* Tracepoints definitions. */
+EXPORT_TRACEPOINT_SYMBOL(kmalloc);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
+EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kfree);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/slub.c b/mm/slub.c

index 73004808537ea841e05c85e0b68312ea645eb114..3e8afcc07a760c552135cfb3c79ac924ee9e5494 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -233,11 +233,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
   *                     Core slab cache functions
   *******************************************************************/
  
-static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
-{
-       return s->node[node];
-}
-
  /* Verify that a pointer has an address that is valid within a slab page */
  static inline int check_valid_pointer(struct kmem_cache *s,
                                 struct page *page, const void *object)
@@ -288,6 +283,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
         for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
                         __p += (__s)->size)
  
+#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
+       for (__p = (__addr), __idx = 1; __idx <= __objects;\
+                       __p += (__s)->size, __idx++)
+
  /* Determine object index from a given position */
  static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
  {
@@ -382,9 +381,9 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
      defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
         if (s->flags & __CMPXCHG_DOUBLE) {
                 if (cmpxchg_double(&page->freelist, &page->counters,
-                       freelist_old, counters_old,
-                       freelist_new, counters_new))
-               return 1;
+                                  freelist_old, counters_old,
+                                  freelist_new, counters_new))
+                       return 1;
         } else
  #endif
         {
@@ -418,9 +417,9 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
      defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
         if (s->flags & __CMPXCHG_DOUBLE) {
                 if (cmpxchg_double(&page->freelist, &page->counters,
-                       freelist_old, counters_old,
-                       freelist_new, counters_new))
-               return 1;
+                                  freelist_old, counters_old,
+                                  freelist_new, counters_new))
+                       return 1;
         } else
  #endif
         {
@@ -944,60 +943,6 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
         }
  }
  
-/*
- * Hooks for other subsystems that check memory allocations. In a typical
- * production configuration these hooks all should produce no code at all.
- */
-static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
-{
-       kmemleak_alloc(ptr, size, 1, flags);
-}
-
-static inline void kfree_hook(const void *x)
-{
-       kmemleak_free(x);
-}
-
-static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
-{
-       flags &= gfp_allowed_mask;
-       lockdep_trace_alloc(flags);
-       might_sleep_if(flags & __GFP_WAIT);
-
-       return should_failslab(s->object_size, flags, s->flags);
-}
-
-static inline void slab_post_alloc_hook(struct kmem_cache *s,
-                                       gfp_t flags, void *object)
-{
-       flags &= gfp_allowed_mask;
-       kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
-       kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
-}
-
-static inline void slab_free_hook(struct kmem_cache *s, void *x)
-{
-       kmemleak_free_recursive(x, s->flags);
-
-       /*
-        * Trouble is that we may no longer disable interrupts in the fast path
-        * So in order to make the debug calls that expect irqs to be
-        * disabled we need to disable interrupts temporarily.
-        */
-#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
-       {
-               unsigned long flags;
-
-               local_irq_save(flags);
-               kmemcheck_slab_free(s, x, s->object_size);
-               debug_check_no_locks_freed(x, s->object_size);
-               local_irq_restore(flags);
-       }
-#endif
-       if (!(s->flags & SLAB_DEBUG_OBJECTS))
-               debug_check_no_obj_freed(x, s->object_size);
-}
-
  /*
   * Tracking of fully allocated slabs for debugging purposes.
   */
@@ -1282,6 +1227,12 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
  static inline void dec_slabs_node(struct kmem_cache *s, int node,
                                                         int objects) {}
  
+#endif /* CONFIG_SLUB_DEBUG */
+
+/*
+ * Hooks for other subsystems that check memory allocations. In a typical
+ * production configuration these hooks all should produce no code at all.
+ */
  static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
  {
         kmemleak_alloc(ptr, size, 1, flags);
@@ -1293,21 +1244,44 @@ static inline void kfree_hook(const void *x)
  }
  
  static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
-                                                       { return 0; }
+{
+       flags &= gfp_allowed_mask;
+       lockdep_trace_alloc(flags);
+       might_sleep_if(flags & __GFP_WAIT);
+
+       return should_failslab(s->object_size, flags, s->flags);
+}
  
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
-               void *object)
+static inline void slab_post_alloc_hook(struct kmem_cache *s,
+                                       gfp_t flags, void *object)
  {
-       kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
-               flags & gfp_allowed_mask);
+       flags &= gfp_allowed_mask;
+       kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+       kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
  }
  
  static inline void slab_free_hook(struct kmem_cache *s, void *x)
  {
         kmemleak_free_recursive(x, s->flags);
-}
  
-#endif /* CONFIG_SLUB_DEBUG */
+       /*
+        * Trouble is that we may no longer disable interrupts in the fast path
+        * So in order to make the debug calls that expect irqs to be
+        * disabled we need to disable interrupts temporarily.
+        */
+#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
+       {
+               unsigned long flags;
+
+               local_irq_save(flags);
+               kmemcheck_slab_free(s, x, s->object_size);
+               debug_check_no_locks_freed(x, s->object_size);
+               local_irq_restore(flags);
+       }
+#endif
+       if (!(s->flags & SLAB_DEBUG_OBJECTS))
+               debug_check_no_obj_freed(x, s->object_size);
+}
  
  /*
   * Slab allocation and freeing
@@ -1409,9 +1383,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
  {
         struct page *page;
         void *start;
-       void *last;
         void *p;
         int order;
+       int idx;
  
         BUG_ON(flags & GFP_SLAB_BUG_MASK);
  
@@ -1432,14 +1406,13 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
         if (unlikely(s->flags & SLAB_POISON))
                 memset(start, POISON_INUSE, PAGE_SIZE << order);
  
-       last = start;
-       for_each_object(p, s, start, page->objects) {
-               setup_object(s, page, last);
-               set_freepointer(s, last, p);
-               last = p;
+       for_each_object_idx(p, idx, s, start, page->objects) {
+               setup_object(s, page, p);
+               if (likely(idx < page->objects))
+                       set_freepointer(s, p, p + s->size);
+               else
+                       set_freepointer(s, p, NULL);
         }
-       setup_object(s, page, last);
-       set_freepointer(s, last, NULL);
  
         page->freelist = start;
         page->inuse = page->objects;
@@ -2162,6 +2135,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
         static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
                                       DEFAULT_RATELIMIT_BURST);
         int node;
+       struct kmem_cache_node *n;
  
         if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
                 return;
@@ -2176,15 +2150,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
                 pr_warn("  %s debugging increased min order, use slub_debug=O to disable.\n",
                         s->name);
  
-       for_each_online_node(node) {
-               struct kmem_cache_node *n = get_node(s, node);
+       for_each_kmem_cache_node(s, node, n) {
                 unsigned long nr_slabs;
                 unsigned long nr_objs;
                 unsigned long nr_free;
  
-               if (!n)
-                       continue;
-
                 nr_free  = count_partial(n, count_free);
                 nr_slabs = node_nr_slabs(n);
                 nr_objs  = node_nr_objs(n);
@@ -2928,13 +2898,10 @@ static void early_kmem_cache_node_alloc(int node)
  static void free_kmem_cache_nodes(struct kmem_cache *s)
  {
         int node;
+       struct kmem_cache_node *n;
  
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = s->node[node];
-
-               if (n)
-                       kmem_cache_free(kmem_cache_node, n);
-
+       for_each_kmem_cache_node(s, node, n) {
+               kmem_cache_free(kmem_cache_node, n);
                 s->node[node] = NULL;
         }
  }
@@ -3222,12 +3189,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
  static inline int kmem_cache_close(struct kmem_cache *s)
  {
         int node;
+       struct kmem_cache_node *n;
  
         flush_all(s);
         /* Attempt to free all objects */
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = get_node(s, node);
-
+       for_each_kmem_cache_node(s, node, n) {
                 free_partial(s, n);
                 if (n->nr_partial || slabs_node(s, node))
                         return 1;
@@ -3412,9 +3378,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
                 return -ENOMEM;
  
         flush_all(s);
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               n = get_node(s, node);
-
+       for_each_kmem_cache_node(s, node, n) {
                 if (!n->nr_partial)
                         continue;
  
@@ -3586,6 +3550,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
  {
         int node;
         struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+       struct kmem_cache_node *n;
  
         memcpy(s, static_cache, kmem_cache->object_size);
  
@@ -3595,19 +3560,16 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
          * IPIs around.
          */
         __flush_cpu_slab(s, smp_processor_id());
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = get_node(s, node);
+       for_each_kmem_cache_node(s, node, n) {
                 struct page *p;
  
-               if (n) {
-                       list_for_each_entry(p, &n->partial, lru)
-                               p->slab_cache = s;
+               list_for_each_entry(p, &n->partial, lru)
+                       p->slab_cache = s;
  
  #ifdef CONFIG_SLUB_DEBUG
-                       list_for_each_entry(p, &n->full, lru)
-                               p->slab_cache = s;
+               list_for_each_entry(p, &n->full, lru)
+                       p->slab_cache = s;
  #endif
-               }
         }
         list_add(&s->list, &slab_caches);
         return s;
@@ -3960,16 +3922,14 @@ static long validate_slab_cache(struct kmem_cache *s)
         unsigned long count = 0;
         unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
                                 sizeof(unsigned long), GFP_KERNEL);
+       struct kmem_cache_node *n;
  
         if (!map)
                 return -ENOMEM;
  
         flush_all(s);
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = get_node(s, node);
-
+       for_each_kmem_cache_node(s, node, n)
                 count += validate_slab_node(s, n, map);
-       }
         kfree(map);
         return count;
  }
@@ -4123,6 +4083,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
         int node;
         unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
                                      sizeof(unsigned long), GFP_KERNEL);
+       struct kmem_cache_node *n;
  
         if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
                                      GFP_TEMPORARY)) {
@@ -4132,8 +4093,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
         /* Push back cpu slabs */
         flush_all(s);
  
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = get_node(s, node);
+       for_each_kmem_cache_node(s, node, n) {
                 unsigned long flags;
                 struct page *page;
  
@@ -4205,7 +4165,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
  #endif
  
  #ifdef SLUB_RESILIENCY_TEST
-static void resiliency_test(void)
+static void __init resiliency_test(void)
  {
         u8 *p;
  
@@ -4332,8 +4292,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
         get_online_mems();
  #ifdef CONFIG_SLUB_DEBUG
         if (flags & SO_ALL) {
-               for_each_node_state(node, N_NORMAL_MEMORY) {
-                       struct kmem_cache_node *n = get_node(s, node);
+               struct kmem_cache_node *n;
+
+               for_each_kmem_cache_node(s, node, n) {
  
                         if (flags & SO_TOTAL)
                                 x = atomic_long_read(&n->total_objects);
@@ -4349,9 +4310,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
         } else
  #endif
         if (flags & SO_PARTIAL) {
-               for_each_node_state(node, N_NORMAL_MEMORY) {
-                       struct kmem_cache_node *n = get_node(s, node);
+               struct kmem_cache_node *n;
  
+               for_each_kmem_cache_node(s, node, n) {
                         if (flags & SO_TOTAL)
                                 x = count_partial(n, count_total);
                         else if (flags & SO_OBJECTS)
@@ -4364,7 +4325,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
         }
         x = sprintf(buf, "%lu", total);
  #ifdef CONFIG_NUMA
-       for_each_node_state(node, N_NORMAL_MEMORY)
+       for (node = 0; node < nr_node_ids; node++)
                 if (nodes[node])
                         x += sprintf(buf + x, " N%d=%lu",
                                         node, nodes[node]);
@@ -4378,16 +4339,12 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
  static int any_slab_objects(struct kmem_cache *s)
  {
         int node;
+       struct kmem_cache_node *n;
  
-       for_each_online_node(node) {
-               struct kmem_cache_node *n = get_node(s, node);
-
-               if (!n)
-                       continue;
-
+       for_each_kmem_cache_node(s, node, n)
                 if (atomic_long_read(&n->total_objects))
                         return 1;
-       }
+
         return 0;
  }
  #endif
@@ -4509,7 +4466,7 @@ SLAB_ATTR_RO(ctor);
  
  static ssize_t aliases_show(struct kmem_cache *s, char *buf)
  {
-       return sprintf(buf, "%d\n", s->refcount - 1);
+       return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
  }
  SLAB_ATTR_RO(aliases);
  
@@ -5171,12 +5128,6 @@ static char *create_unique_id(struct kmem_cache *s)
                 *p++ = '-';
         p += sprintf(p, "%07d", s->size);
  
-#ifdef CONFIG_MEMCG_KMEM
-       if (!is_root_cache(s))
-               p += sprintf(p, "-%08d",
-                               memcg_cache_id(s->memcg_params->memcg));
-#endif
-
         BUG_ON(p > name + ID_STR_LENGTH - 1);
         return name;
  }
@@ -5342,13 +5293,9 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
         unsigned long nr_objs = 0;
         unsigned long nr_free = 0;
         int node;
+       struct kmem_cache_node *n;
  
-       for_each_online_node(node) {
-               struct kmem_cache_node *n = get_node(s, node);
-
-               if (!n)
-                       continue;
-
+       for_each_kmem_cache_node(s, node, n) {
                 nr_slabs += node_nr_slabs(n);
                 nr_objs += node_nr_objs(n);
                 nr_free += count_partial(n, count_free);
diff --git a/mm/swap.c b/mm/swap.c

index 9e8e3472248bb8dfa10107fb212974e1343ffa4a..c789d01c9ec31db05803566bc23baee090908366 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -501,7 +501,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
                 SetPageActive(page);
                 lru += LRU_ACTIVE;
                 add_page_to_lru_list(page, lruvec, lru);
-               trace_mm_lru_activate(page, page_to_pfn(page));
+               trace_mm_lru_activate(page);
  
                 __count_vm_event(PGACTIVATE);
                 update_page_reclaim_stat(lruvec, file, 1);
@@ -589,6 +589,9 @@ static void __lru_cache_activate_page(struct page *page)
   * inactive,unreferenced       ->      inactive,referenced
   * inactive,referenced         ->      active,unreferenced
   * active,unreferenced         ->      active,referenced
+ *
+ * When a newly allocated page is not yet visible, so safe for non-atomic ops,
+ * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
   */
  void mark_page_accessed(struct page *page)
  {
@@ -614,17 +617,6 @@ void mark_page_accessed(struct page *page)
  }
  EXPORT_SYMBOL(mark_page_accessed);
  
-/*
- * Used to mark_page_accessed(page) that is not visible yet and when it is
- * still safe to use non-atomic ops
- */
-void init_page_accessed(struct page *page)
-{
-       if (!PageReferenced(page))
-               __SetPageReferenced(page);
-}
-EXPORT_SYMBOL(init_page_accessed);
-
  static void __lru_cache_add(struct page *page)
  {
         struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
@@ -996,7 +988,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
         SetPageLRU(page);
         add_page_to_lru_list(page, lruvec, lru);
         update_page_reclaim_stat(lruvec, file, active);
-       trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
+       trace_mm_lru_insertion(page, lru);
  }
  
  /*
diff --git a/mm/util.c b/mm/util.c

index d5ea733c508265aaba619248d973ec640a73d04a..7b6608df2ee803d9d4345dc11290eb17100199f4 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -16,9 +16,6 @@
  
  #include "internal.h"
  
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
  /**
   * kstrdup - allocate space for and copy an existing string
   * @s: the string to duplicate
@@ -112,97 +109,6 @@ void *memdup_user(const void __user *src, size_t len)
  }
  EXPORT_SYMBOL(memdup_user);
  
-static __always_inline void *__do_krealloc(const void *p, size_t new_size,
-                                          gfp_t flags)
-{
-       void *ret;
-       size_t ks = 0;
-
-       if (p)
-               ks = ksize(p);
-
-       if (ks >= new_size)
-               return (void *)p;
-
-       ret = kmalloc_track_caller(new_size, flags);
-       if (ret && p)
-               memcpy(ret, p, ks);
-
-       return ret;
-}
-
-/**
- * __krealloc - like krealloc() but don't free @p.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * This function is like krealloc() except it never frees the originally
- * allocated buffer. Use this if you don't want to free the buffer immediately
- * like, for example, with RCU.
- */
-void *__krealloc(const void *p, size_t new_size, gfp_t flags)
-{
-       if (unlikely(!new_size))
-               return ZERO_SIZE_PTR;
-
-       return __do_krealloc(p, new_size, flags);
-
-}
-EXPORT_SYMBOL(__krealloc);
-
-/**
- * krealloc - reallocate memory. The contents will remain unchanged.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * The contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes.  If @p is %NULL, krealloc()
- * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
- * %NULL pointer, the object pointed to is freed.
- */
-void *krealloc(const void *p, size_t new_size, gfp_t flags)
-{
-       void *ret;
-
-       if (unlikely(!new_size)) {
-               kfree(p);
-               return ZERO_SIZE_PTR;
-       }
-
-       ret = __do_krealloc(p, new_size, flags);
-       if (ret && p != ret)
-               kfree(p);
-
-       return ret;
-}
-EXPORT_SYMBOL(krealloc);
-
-/**
- * kzfree - like kfree but zero memory
- * @p: object to free memory of
- *
- * The memory of the object @p points to is zeroed before freed.
- * If @p is %NULL, kzfree() does nothing.
- *
- * Note: this function zeroes the whole allocated buffer which can be a good
- * deal bigger than the requested buffer size passed to kmalloc(). So be
- * careful when using this function in performance sensitive code.
- */
-void kzfree(const void *p)
-{
-       size_t ks;
-       void *mem = (void *)p;
-
-       if (unlikely(ZERO_OR_NULL_PTR(mem)))
-               return;
-       ks = ksize(mem);
-       memset(mem, 0, ks);
-       kfree(mem);
-}
-EXPORT_SYMBOL(kzfree);
-
  /*
   * strndup_user - duplicate an existing string from user space
   * @s: The string to duplicate
@@ -504,11 +410,3 @@ out_mm:
  out:
         return res;
  }
-
-/* Tracepoints definitions. */
-EXPORT_TRACEPOINT_SYMBOL(kmalloc);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
-EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kfree);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c

index f64632b671964a0788b43e8d30ae0edb7b292292..2b0aa5486092dca2745c2ec201cda44db033550c 100644 (file)
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1270,19 +1270,15 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
  }
  EXPORT_SYMBOL_GPL(unmap_kernel_range);
  
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
  {
         unsigned long addr = (unsigned long)area->addr;
         unsigned long end = addr + get_vm_area_size(area);
         int err;
  
-       err = vmap_page_range(addr, end, prot, *pages);
-       if (err > 0) {
-               *pages += err;
-               err = 0;
-       }
+       err = vmap_page_range(addr, end, prot, pages);
  
-       return err;
+       return err > 0 ? 0 : err;
  }
  EXPORT_SYMBOL_GPL(map_vm_area);
  
@@ -1548,7 +1544,7 @@ void *vmap(struct page **pages, unsigned int count,
         if (!area)
                 return NULL;
  
-       if (map_vm_area(area, prot, &pages)) {
+       if (map_vm_area(area, prot, pages)) {
                 vunmap(area->addr);
                 return NULL;
         }
@@ -1566,7 +1562,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
         const int order = 0;
         struct page **pages;
         unsigned int nr_pages, array_size, i;
-       gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+       const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+       const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
  
         nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
         array_size = (nr_pages * sizeof(struct page *));
@@ -1589,12 +1586,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
  
         for (i = 0; i < area->nr_pages; i++) {
                 struct page *page;
-               gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
  
                 if (node == NUMA_NO_NODE)
-                       page = alloc_page(tmp_mask);
+                       page = alloc_page(alloc_mask);
                 else
-                       page = alloc_pages_node(node, tmp_mask, order);
+                       page = alloc_pages_node(node, alloc_mask, order);
  
                 if (unlikely(!page)) {
                         /* Successfully allocated i pages, free them in __vunmap() */
@@ -1602,9 +1598,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
                         goto fail;
                 }
                 area->pages[i] = page;
+               if (gfp_mask & __GFP_WAIT)
+                       cond_resched();
         }
  
-       if (map_vm_area(area, prot, &pages))
+       if (map_vm_area(area, prot, pages))
                 goto fail;
         return area->addr;
  
@@ -2690,14 +2688,14 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
  
         prev_end = VMALLOC_START;
  
-       spin_lock(&vmap_area_lock);
+       rcu_read_lock();
  
         if (list_empty(&vmap_area_list)) {
                 vmi->largest_chunk = VMALLOC_TOTAL;
                 goto out;
         }
  
-       list_for_each_entry(va, &vmap_area_list, list) {
+       list_for_each_entry_rcu(va, &vmap_area_list, list) {
                 unsigned long addr = va->va_start;
  
                 /*
@@ -2724,7 +2722,7 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
                 vmi->largest_chunk = VMALLOC_END - prev_end;
  
  out:
-       spin_unlock(&vmap_area_lock);
+       rcu_read_unlock();
  }
  #endif
  
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 0f16ffe8eb67c6fcd0350add4a5a4b6092cb6905..d2f65c856350eb179f83c20cde87d961d92eb28f 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -59,35 +59,20 @@
  #include <trace/events/vmscan.h>
  
  struct scan_control {
-       /* Incremented by the number of inactive pages that were scanned */
-       unsigned long nr_scanned;
-
-       /* Number of pages freed so far during a call to shrink_zones() */
-       unsigned long nr_reclaimed;
-
         /* How many pages shrink_list() should reclaim */
         unsigned long nr_to_reclaim;
  
-       unsigned long hibernation_mode;
-
         /* This context's GFP mask */
         gfp_t gfp_mask;
  
-       int may_writepage;
-
-       /* Can mapped pages be reclaimed? */
-       int may_unmap;
-
-       /* Can pages be swapped as part of reclaim? */
-       int may_swap;
-
+       /* Allocation order */
         int order;
  
-       /* Scan (total_size >> priority) pages at once */
-       int priority;
-
-       /* anon vs. file LRUs scanning "ratio" */
-       int swappiness;
+       /*
+        * Nodemask of nodes allowed by the caller. If NULL, all nodes
+        * are scanned.
+        */
+       nodemask_t      *nodemask;
  
         /*
          * The memory cgroup that hit its limit and as a result is the
@@ -95,11 +80,27 @@ struct scan_control {
          */
         struct mem_cgroup *target_mem_cgroup;
  
-       /*
-        * Nodemask of nodes allowed by the caller. If NULL, all nodes
-        * are scanned.
-        */
-       nodemask_t      *nodemask;
+       /* Scan (total_size >> priority) pages at once */
+       int priority;
+
+       unsigned int may_writepage:1;
+
+       /* Can mapped pages be reclaimed? */
+       unsigned int may_unmap:1;
+
+       /* Can pages be swapped as part of reclaim? */
+       unsigned int may_swap:1;
+
+       unsigned int hibernation_mode:1;
+
+       /* One of the zones is ready for compaction */
+       unsigned int compaction_ready:1;
+
+       /* Incremented by the number of inactive pages that were scanned */
+       unsigned long nr_scanned;
+
+       /* Number of pages freed so far during a call to shrink_zones() */
+       unsigned long nr_reclaimed;
  };
  
  #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +137,11 @@ struct scan_control {
   * From 0 .. 100.  Higher means more swappy.
   */
  int vm_swappiness = 60;
-unsigned long vm_total_pages;  /* The total number of pages which the VM controls */
+/*
+ * The total number of pages which are beyond the high watermark within all
+ * zones.
+ */
+unsigned long vm_total_pages;
  
  static LIST_HEAD(shrinker_list);
  static DECLARE_RWSEM(shrinker_rwsem);
@@ -169,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
  
  bool zone_reclaimable(struct zone *zone)
  {
-       return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+       return zone_page_state(zone, NR_PAGES_SCANNED) <
+               zone_reclaimable_pages(zone) * 6;
  }
  
  static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -1503,7 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
         __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
  
         if (global_reclaim(sc)) {
-               zone->pages_scanned += nr_scanned;
+               __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
                 if (current_is_kswapd())
                         __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
                 else
@@ -1693,7 +1699,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
         nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
                                      &nr_scanned, sc, isolate_mode, lru);
         if (global_reclaim(sc))
-               zone->pages_scanned += nr_scanned;
+               __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
  
         reclaim_stat->recent_scanned[file] += nr_taken;
  
@@ -1750,7 +1756,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
          * Count referenced pages from currently used mappings as rotated,
          * even though only some of them are actually re-activated.  This
          * helps balance scan pressure between file and anonymous pages in
-        * get_scan_ratio.
+        * get_scan_count.
          */
         reclaim_stat->recent_rotated[file] += nr_rotated;
  
@@ -1865,8 +1871,8 @@ enum scan_balance {
   * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
   * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
   */
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
-                          unsigned long *nr)
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
+                          struct scan_control *sc, unsigned long *nr)
  {
         struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
         u64 fraction[2];
@@ -1909,7 +1915,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
          * using the memory controller's swap limit feature would be
          * too expensive.
          */
-       if (!global_reclaim(sc) && !sc->swappiness) {
+       if (!global_reclaim(sc) && !swappiness) {
                 scan_balance = SCAN_FILE;
                 goto out;
         }
@@ -1919,16 +1925,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
          * system is close to OOM, scan both anon and file equally
          * (unless the swappiness setting disagrees with swapping).
          */
-       if (!sc->priority && sc->swappiness) {
+       if (!sc->priority && swappiness) {
                 scan_balance = SCAN_EQUAL;
                 goto out;
         }
  
-       anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
-               get_lru_size(lruvec, LRU_INACTIVE_ANON);
-       file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
-               get_lru_size(lruvec, LRU_INACTIVE_FILE);
-
         /*
          * Prevent the reclaimer from falling into the cache trap: as
          * cache pages start out inactive, every cache fault will tip
@@ -1939,9 +1940,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
          * anon pages.  Try to detect this based on file LRU size.
          */
         if (global_reclaim(sc)) {
-               unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
+               unsigned long zonefile;
+               unsigned long zonefree;
  
-               if (unlikely(file + free <= high_wmark_pages(zone))) {
+               zonefree = zone_page_state(zone, NR_FREE_PAGES);
+               zonefile = zone_page_state(zone, NR_ACTIVE_FILE) +
+                          zone_page_state(zone, NR_INACTIVE_FILE);
+
+               if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) {
                         scan_balance = SCAN_ANON;
                         goto out;
                 }
@@ -1962,7 +1968,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
          * With swappiness at 100, anonymous and file have the same priority.
          * This scanning priority is essentially the inverse of IO cost.
          */
-       anon_prio = sc->swappiness;
+       anon_prio = swappiness;
         file_prio = 200 - anon_prio;
  
         /*
@@ -1976,6 +1982,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
          *
          * anon in [0], file in [1]
          */
+
+       anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+               get_lru_size(lruvec, LRU_INACTIVE_ANON);
+       file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+               get_lru_size(lruvec, LRU_INACTIVE_FILE);
+
         spin_lock_irq(&zone->lru_lock);
         if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
                 reclaim_stat->recent_scanned[0] /= 2;
@@ -2052,7 +2064,8 @@ out:
  /*
   * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
   */
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+                         struct scan_control *sc)
  {
         unsigned long nr[NR_LRU_LISTS];
         unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2076,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
         struct blk_plug plug;
         bool scan_adjusted;
  
-       get_scan_count(lruvec, sc, nr);
+       get_scan_count(lruvec, swappiness, sc, nr);
  
         /* Record the original scan target for proportional adjustments later */
         memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2254,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
         }
  }
  
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
  {
         unsigned long nr_reclaimed, nr_scanned;
+       bool reclaimable = false;
  
         do {
                 struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2273,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                 memcg = mem_cgroup_iter(root, NULL, &reclaim);
                 do {
                         struct lruvec *lruvec;
+                       int swappiness;
  
                         lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+                       swappiness = mem_cgroup_swappiness(memcg);
  
-                       sc->swappiness = mem_cgroup_swappiness(memcg);
-                       shrink_lruvec(lruvec, sc);
+                       shrink_lruvec(lruvec, swappiness, sc);
  
                         /*
                          * Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2302,21 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                            sc->nr_scanned - nr_scanned,
                            sc->nr_reclaimed - nr_reclaimed);
  
+               if (sc->nr_reclaimed - nr_reclaimed)
+                       reclaimable = true;
+
         } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
                                          sc->nr_scanned - nr_scanned, sc));
+
+       return reclaimable;
  }
  
  /* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline bool compaction_ready(struct zone *zone, int order)
  {
         unsigned long balance_gap, watermark;
         bool watermark_ok;
  
-       /* Do not consider compaction for orders reclaim is meant to satisfy */
-       if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
-               return false;
-
         /*
          * Compaction takes time to run and there are potentially other
          * callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2325,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
          */
         balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
                         zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
-       watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+       watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
         watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
  
         /*
          * If compaction is deferred, reclaim up to a point where
          * compaction will have a chance of success when re-enabled
          */
-       if (compaction_deferred(zone, sc->order))
+       if (compaction_deferred(zone, order))
                 return watermark_ok;
  
         /* If compaction is not ready to start, keep reclaiming */
-       if (!compaction_suitable(zone, sc->order))
+       if (!compaction_suitable(zone, order))
                 return false;
  
         return watermark_ok;
@@ -2342,10 +2358,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
   * If a zone is deemed to be full of pinned pages then just give it a light
   * scan then give up on it.
   *
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
+ * Returns true if a zone was reclaimable.
   */
  static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
  {
@@ -2354,13 +2367,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
         unsigned long nr_soft_reclaimed;
         unsigned long nr_soft_scanned;
         unsigned long lru_pages = 0;
-       bool aborted_reclaim = false;
         struct reclaim_state *reclaim_state = current->reclaim_state;
         gfp_t orig_mask;
         struct shrink_control shrink = {
                 .gfp_mask = sc->gfp_mask,
         };
         enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+       bool reclaimable = false;
  
         /*
          * If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2404,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                         if (sc->priority != DEF_PRIORITY &&
                             !zone_reclaimable(zone))
                                 continue;       /* Let kswapd poll it */
-                       if (IS_ENABLED(CONFIG_COMPACTION)) {
-                               /*
-                                * If we already have plenty of memory free for
-                                * compaction in this zone, don't free any more.
-                                * Even though compaction is invoked for any
-                                * non-zero order, only frequent costly order
-                                * reclamation is disruptive enough to become a
-                                * noticeable problem, like transparent huge
-                                * page allocations.
-                                */
-                               if ((zonelist_zone_idx(z) <= requested_highidx)
-                                   && compaction_ready(zone, sc)) {
-                                       aborted_reclaim = true;
-                                       continue;
-                               }
+
+                       /*
+                        * If we already have plenty of memory free for
+                        * compaction in this zone, don't free any more.
+                        * Even though compaction is invoked for any
+                        * non-zero order, only frequent costly order
+                        * reclamation is disruptive enough to become a
+                        * noticeable problem, like transparent huge
+                        * page allocations.
+                        */
+                       if (IS_ENABLED(CONFIG_COMPACTION) &&
+                           sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+                           zonelist_zone_idx(z) <= requested_highidx &&
+                           compaction_ready(zone, sc->order)) {
+                               sc->compaction_ready = true;
+                               continue;
                         }
+
                         /*
                          * This steals pages from memory cgroups over softlimit
                          * and returns the number of reclaimed pages and
@@ -2419,10 +2434,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                                                 &nr_soft_scanned);
                         sc->nr_reclaimed += nr_soft_reclaimed;
                         sc->nr_scanned += nr_soft_scanned;
+                       if (nr_soft_reclaimed)
+                               reclaimable = true;
                         /* need some check for avoid more shrink_zone() */
                 }
  
-               shrink_zone(zone, sc);
+               if (shrink_zone(zone, sc))
+                       reclaimable = true;
+
+               if (global_reclaim(sc) &&
+                   !reclaimable && zone_reclaimable(zone))
+                       reclaimable = true;
         }
  
         /*
@@ -2445,27 +2467,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
          */
         sc->gfp_mask = orig_mask;
  
-       return aborted_reclaim;
-}
-
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
-               struct scan_control *sc)
-{
-       struct zoneref *z;
-       struct zone *zone;
-
-       for_each_zone_zonelist_nodemask(zone, z, zonelist,
-                       gfp_zone(sc->gfp_mask), sc->nodemask) {
-               if (!populated_zone(zone))
-                       continue;
-               if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-                       continue;
-               if (zone_reclaimable(zone))
-                       return false;
-       }
-
-       return true;
+       return reclaimable;
  }
  
  /*
@@ -2489,7 +2491,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
  {
         unsigned long total_scanned = 0;
         unsigned long writeback_threshold;
-       bool aborted_reclaim;
+       bool zones_reclaimable;
  
         delayacct_freepages_start();
  
@@ -2500,11 +2502,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
                                 sc->priority);
                 sc->nr_scanned = 0;
-               aborted_reclaim = shrink_zones(zonelist, sc);
+               zones_reclaimable = shrink_zones(zonelist, sc);
  
                 total_scanned += sc->nr_scanned;
                 if (sc->nr_reclaimed >= sc->nr_to_reclaim)
-                       goto out;
+                       break;
+
+               if (sc->compaction_ready)
+                       break;
  
                 /*
                  * If we're getting trouble reclaiming, start doing
@@ -2526,28 +2531,19 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                                 WB_REASON_TRY_TO_FREE_PAGES);
                         sc->may_writepage = 1;
                 }
-       } while (--sc->priority >= 0 && !aborted_reclaim);
+       } while (--sc->priority >= 0);
  
-out:
         delayacct_freepages_end();
  
         if (sc->nr_reclaimed)
                 return sc->nr_reclaimed;
  
-       /*
-        * As hibernation is going on, kswapd is freezed so that it can't mark
-        * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
-        * check.
-        */
-       if (oom_killer_disabled)
-               return 0;
-
         /* Aborted reclaim to try compaction? don't OOM, then */
-       if (aborted_reclaim)
+       if (sc->compaction_ready)
                 return 1;
  
-       /* top priority shrink_zones still had more to do? don't OOM, then */
-       if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+       /* Any of the zones still reclaimable?  Don't OOM. */
+       if (zones_reclaimable)
                 return 1;
  
         return 0;
@@ -2684,15 +2680,14 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
  {
         unsigned long nr_reclaimed;
         struct scan_control sc = {
+               .nr_to_reclaim = SWAP_CLUSTER_MAX,
                 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
+               .order = order,
+               .nodemask = nodemask,
+               .priority = DEF_PRIORITY,
                 .may_writepage = !laptop_mode,
-               .nr_to_reclaim = SWAP_CLUSTER_MAX,
                 .may_unmap = 1,
                 .may_swap = 1,
-               .order = order,
-               .priority = DEF_PRIORITY,
-               .target_mem_cgroup = NULL,
-               .nodemask = nodemask,
         };
  
         /*
@@ -2722,17 +2717,14 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
                                                 unsigned long *nr_scanned)
  {
         struct scan_control sc = {
-               .nr_scanned = 0,
                 .nr_to_reclaim = SWAP_CLUSTER_MAX,
+               .target_mem_cgroup = memcg,
                 .may_writepage = !laptop_mode,
                 .may_unmap = 1,
                 .may_swap = !noswap,
-               .order = 0,
-               .priority = 0,
-               .swappiness = mem_cgroup_swappiness(memcg),
-               .target_mem_cgroup = memcg,
         };
         struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+       int swappiness = mem_cgroup_swappiness(memcg);
  
         sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                         (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2740,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
          * will pick up pages from other mem cgroup's as well. We hack
          * the priority and make it zero.
          */
-       shrink_lruvec(lruvec, &sc);
+       shrink_lruvec(lruvec, swappiness, &sc);
  
         trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
  
@@ -2764,16 +2756,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
         unsigned long nr_reclaimed;
         int nid;
         struct scan_control sc = {
-               .may_writepage = !laptop_mode,
-               .may_unmap = 1,
-               .may_swap = !noswap,
                 .nr_to_reclaim = SWAP_CLUSTER_MAX,
-               .order = 0,
-               .priority = DEF_PRIORITY,
-               .target_mem_cgroup = memcg,
-               .nodemask = NULL, /* we don't care the placement */
                 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                                 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+               .target_mem_cgroup = memcg,
+               .priority = DEF_PRIORITY,
+               .may_writepage = !laptop_mode,
+               .may_unmap = 1,
+               .may_swap = !noswap,
         };
  
         /*
@@ -3031,12 +3021,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
         unsigned long nr_soft_scanned;
         struct scan_control sc = {
                 .gfp_mask = GFP_KERNEL,
+               .order = order,
                 .priority = DEF_PRIORITY,
+               .may_writepage = !laptop_mode,
                 .may_unmap = 1,
                 .may_swap = 1,
-               .may_writepage = !laptop_mode,
-               .order = order,
-               .target_mem_cgroup = NULL,
         };
         count_vm_event(PAGEOUTRUN);
  
@@ -3417,14 +3406,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
  {
         struct reclaim_state reclaim_state;
         struct scan_control sc = {
+               .nr_to_reclaim = nr_to_reclaim,
                 .gfp_mask = GFP_HIGHUSER_MOVABLE,
-               .may_swap = 1,
-               .may_unmap = 1,
+               .priority = DEF_PRIORITY,
                 .may_writepage = 1,
-               .nr_to_reclaim = nr_to_reclaim,
+               .may_unmap = 1,
+               .may_swap = 1,
                 .hibernation_mode = 1,
-               .order = 0,
-               .priority = DEF_PRIORITY,
         };
         struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
         struct task_struct *p = current;
@@ -3604,13 +3592,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
         struct task_struct *p = current;
         struct reclaim_state reclaim_state;
         struct scan_control sc = {
-               .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
-               .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
-               .may_swap = 1,
                 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
                 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
                 .order = order,
                 .priority = ZONE_RECLAIM_PRIORITY,
+               .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+               .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+               .may_swap = 1,
         };
         struct shrink_control shrink = {
                 .gfp_mask = sc.gfp_mask,
diff --git a/mm/vmstat.c b/mm/vmstat.c

index b37bd49bfd55e206e0845535bc42e7817502ce3b..e9ab104b956f127f79598231863bd62b7f559147 100644 (file)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -200,7 +200,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
                         continue;
  
                 threshold = (*calculate_pressure)(zone);
-               for_each_possible_cpu(cpu)
+               for_each_online_cpu(cpu)
                         per_cpu_ptr(zone->pageset, cpu)->stat_threshold
                                                         = threshold;
         }
@@ -763,6 +763,7 @@ const char * const vmstat_text[] = {
         "nr_shmem",
         "nr_dirtied",
         "nr_written",
+       "nr_pages_scanned",
  
  #ifdef CONFIG_NUMA
         "numa_hit",
@@ -1067,7 +1068,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                    min_wmark_pages(zone),
                    low_wmark_pages(zone),
                    high_wmark_pages(zone),
-                  zone->pages_scanned,
+                  zone_page_state(zone, NR_PAGES_SCANNED),
                    zone->spanned_pages,
                    zone->present_pages,
                    zone->managed_pages);
@@ -1077,10 +1078,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                                 zone_page_state(zone, i));
  
         seq_printf(m,
-                  "\n        protection: (%lu",
+                  "\n        protection: (%ld",
                    zone->lowmem_reserve[0]);
         for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
-               seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+               seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
         seq_printf(m,
                    ")"
                    "\n  pagesets");
diff --git a/mm/zbud.c b/mm/zbud.c

index 01df13a7e2e1e51bdd0ea5c8d06e3aad1d65d066..a05790b1915eb4faba938cd0264c8a5ccfd2aa9e 100644 (file)
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -51,6 +51,7 @@
  #include <linux/slab.h>
  #include <linux/spinlock.h>
  #include <linux/zbud.h>
+#include <linux/zpool.h>
  
  /*****************
   * Structures
@@ -112,6 +113,90 @@ struct zbud_header {
         bool under_reclaim;
  };
  
+/*****************
+ * zpool
+ ****************/
+
+#ifdef CONFIG_ZPOOL
+
+static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
+{
+       return zpool_evict(pool, handle);
+}
+
+static struct zbud_ops zbud_zpool_ops = {
+       .evict =        zbud_zpool_evict
+};
+
+static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+       return zbud_create_pool(gfp, &zbud_zpool_ops);
+}
+
+static void zbud_zpool_destroy(void *pool)
+{
+       zbud_destroy_pool(pool);
+}
+
+static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+                       unsigned long *handle)
+{
+       return zbud_alloc(pool, size, gfp, handle);
+}
+static void zbud_zpool_free(void *pool, unsigned long handle)
+{
+       zbud_free(pool, handle);
+}
+
+static int zbud_zpool_shrink(void *pool, unsigned int pages,
+                       unsigned int *reclaimed)
+{
+       unsigned int total = 0;
+       int ret = -EINVAL;
+
+       while (total < pages) {
+               ret = zbud_reclaim_page(pool, 8);
+               if (ret < 0)
+                       break;
+               total++;
+       }
+
+       if (reclaimed)
+               *reclaimed = total;
+
+       return ret;
+}
+
+static void *zbud_zpool_map(void *pool, unsigned long handle,
+                       enum zpool_mapmode mm)
+{
+       return zbud_map(pool, handle);
+}
+static void zbud_zpool_unmap(void *pool, unsigned long handle)
+{
+       zbud_unmap(pool, handle);
+}
+
+static u64 zbud_zpool_total_size(void *pool)
+{
+       return zbud_get_pool_size(pool) * PAGE_SIZE;
+}
+
+static struct zpool_driver zbud_zpool_driver = {
+       .type =         "zbud",
+       .owner =        THIS_MODULE,
+       .create =       zbud_zpool_create,
+       .destroy =      zbud_zpool_destroy,
+       .malloc =       zbud_zpool_malloc,
+       .free =         zbud_zpool_free,
+       .shrink =       zbud_zpool_shrink,
+       .map =          zbud_zpool_map,
+       .unmap =        zbud_zpool_unmap,
+       .total_size =   zbud_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
+
  /*****************
   * Helpers
  *****************/
@@ -122,7 +207,7 @@ enum buddy {
  };
  
  /* Converts an allocation size in bytes to size in zbud chunks */
-static int size_to_chunks(int size)
+static int size_to_chunks(size_t size)
  {
         return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
  }
@@ -247,7 +332,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
   * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
   * a new page.
   */
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
                         unsigned long *handle)
  {
         int chunks, i, freechunks;
@@ -511,11 +596,20 @@ static int __init init_zbud(void)
         /* Make sure the zbud header will fit in one chunk */
         BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
         pr_info("loaded\n");
+
+#ifdef CONFIG_ZPOOL
+       zpool_register_driver(&zbud_zpool_driver);
+#endif
+
         return 0;
  }
  
  static void __exit exit_zbud(void)
  {
+#ifdef CONFIG_ZPOOL
+       zpool_unregister_driver(&zbud_zpool_driver);
+#endif
+
         pr_info("unloaded\n");
  }
  
diff --git a/mm/zpool.c b/mm/zpool.c

new file mode 100644 (file)

index 0000000..e40612a
--- /dev/null
+++ b/mm/zpool.c
@@ -0,0 +1,364 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for memory storage pool implementations.
+ * Typically, this is used to store compressed memory.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/zpool.h>
+
+struct zpool {
+       char *type;
+
+       struct zpool_driver *driver;
+       void *pool;
+       struct zpool_ops *ops;
+
+       struct list_head list;
+};
+
+static LIST_HEAD(drivers_head);
+static DEFINE_SPINLOCK(drivers_lock);
+
+static LIST_HEAD(pools_head);
+static DEFINE_SPINLOCK(pools_lock);
+
+/**
+ * zpool_register_driver() - register a zpool implementation.
+ * @driver:    driver to register
+ */
+void zpool_register_driver(struct zpool_driver *driver)
+{
+       spin_lock(&drivers_lock);
+       atomic_set(&driver->refcount, 0);
+       list_add(&driver->list, &drivers_head);
+       spin_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(zpool_register_driver);
+
+/**
+ * zpool_unregister_driver() - unregister a zpool implementation.
+ * @driver:    driver to unregister.
+ *
+ * Module usage counting is used to prevent using a driver
+ * while/after unloading, so if this is called from module
+ * exit function, this should never fail; if called from
+ * other than the module exit function, and this returns
+ * failure, the driver is in use and must remain available.
+ */
+int zpool_unregister_driver(struct zpool_driver *driver)
+{
+       int ret = 0, refcount;
+
+       spin_lock(&drivers_lock);
+       refcount = atomic_read(&driver->refcount);
+       WARN_ON(refcount < 0);
+       if (refcount > 0)
+               ret = -EBUSY;
+       else
+               list_del(&driver->list);
+       spin_unlock(&drivers_lock);
+
+       return ret;
+}
+EXPORT_SYMBOL(zpool_unregister_driver);
+
+/**
+ * zpool_evict() - evict callback from a zpool implementation.
+ * @pool:      pool to evict from.
+ * @handle:    handle to evict.
+ *
+ * This can be used by zpool implementations to call the
+ * user's evict zpool_ops struct evict callback.
+ */
+int zpool_evict(void *pool, unsigned long handle)
+{
+       struct zpool *zpool;
+
+       spin_lock(&pools_lock);
+       list_for_each_entry(zpool, &pools_head, list) {
+               if (zpool->pool == pool) {
+                       spin_unlock(&pools_lock);
+                       if (!zpool->ops || !zpool->ops->evict)
+                               return -EINVAL;
+                       return zpool->ops->evict(zpool, handle);
+               }
+       }
+       spin_unlock(&pools_lock);
+
+       return -ENOENT;
+}
+EXPORT_SYMBOL(zpool_evict);
+
+static struct zpool_driver *zpool_get_driver(char *type)
+{
+       struct zpool_driver *driver;
+
+       spin_lock(&drivers_lock);
+       list_for_each_entry(driver, &drivers_head, list) {
+               if (!strcmp(driver->type, type)) {
+                       bool got = try_module_get(driver->owner);
+
+                       if (got)
+                               atomic_inc(&driver->refcount);
+                       spin_unlock(&drivers_lock);
+                       return got ? driver : NULL;
+               }
+       }
+
+       spin_unlock(&drivers_lock);
+       return NULL;
+}
+
+static void zpool_put_driver(struct zpool_driver *driver)
+{
+       atomic_dec(&driver->refcount);
+       module_put(driver->owner);
+}
+
+/**
+ * zpool_create_pool() - Create a new zpool
+ * @type       The type of the zpool to create (e.g. zbud, zsmalloc)
+ * @gfp                The GFP flags to use when allocating the pool.
+ * @ops                The optional ops callback.
+ *
+ * This creates a new zpool of the specified type.  The gfp flags will be
+ * used when allocating memory, if the implementation supports it.  If the
+ * ops param is NULL, then the created zpool will not be shrinkable.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: New zpool on success, NULL on failure.
+ */
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
+{
+       struct zpool_driver *driver;
+       struct zpool *zpool;
+
+       pr_info("creating pool type %s\n", type);
+
+       driver = zpool_get_driver(type);
+
+       if (!driver) {
+               request_module(type);
+               driver = zpool_get_driver(type);
+       }
+
+       if (!driver) {
+               pr_err("no driver for type %s\n", type);
+               return NULL;
+       }
+
+       zpool = kmalloc(sizeof(*zpool), gfp);
+       if (!zpool) {
+               pr_err("couldn't create zpool - out of memory\n");
+               zpool_put_driver(driver);
+               return NULL;
+       }
+
+       zpool->type = driver->type;
+       zpool->driver = driver;
+       zpool->pool = driver->create(gfp, ops);
+       zpool->ops = ops;
+
+       if (!zpool->pool) {
+               pr_err("couldn't create %s pool\n", type);
+               zpool_put_driver(driver);
+               kfree(zpool);
+               return NULL;
+       }
+
+       pr_info("created %s pool\n", type);
+
+       spin_lock(&pools_lock);
+       list_add(&zpool->list, &pools_head);
+       spin_unlock(&pools_lock);
+
+       return zpool;
+}
+
+/**
+ * zpool_destroy_pool() - Destroy a zpool
+ * @pool       The zpool to destroy.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when destroying different pools.  The same
+ * pool should only be destroyed once, and should not be used
+ * after it is destroyed.
+ *
+ * This destroys an existing zpool.  The zpool should not be in use.
+ */
+void zpool_destroy_pool(struct zpool *zpool)
+{
+       pr_info("destroying pool type %s\n", zpool->type);
+
+       spin_lock(&pools_lock);
+       list_del(&zpool->list);
+       spin_unlock(&pools_lock);
+       zpool->driver->destroy(zpool->pool);
+       zpool_put_driver(zpool->driver);
+       kfree(zpool);
+}
+
+/**
+ * zpool_get_type() - Get the type of the zpool
+ * @pool       The zpool to check
+ *
+ * This returns the type of the pool.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: The type of zpool.
+ */
+char *zpool_get_type(struct zpool *zpool)
+{
+       return zpool->type;
+}
+
+/**
+ * zpool_malloc() - Allocate memory
+ * @pool       The zpool to allocate from.
+ * @size       The amount of memory to allocate.
+ * @gfp                The GFP flags to use when allocating memory.
+ * @handle     Pointer to the handle to set
+ *
+ * This allocates the requested amount of memory from the pool.
+ * The gfp flags will be used when allocating memory, if the
+ * implementation supports it.  The provided @handle will be
+ * set to the allocated object handle.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error.
+ */
+int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp,
+                       unsigned long *handle)
+{
+       return zpool->driver->malloc(zpool->pool, size, gfp, handle);
+}
+
+/**
+ * zpool_free() - Free previously allocated memory
+ * @pool       The zpool that allocated the memory.
+ * @handle     The handle to the memory to free.
+ *
+ * This frees previously allocated memory.  This does not guarantee
+ * that the pool will actually free memory, only that the memory
+ * in the pool will become available for use by the pool.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when freeing different handles.  The same
+ * handle should only be freed once, and should not be used
+ * after freeing.
+ */
+void zpool_free(struct zpool *zpool, unsigned long handle)
+{
+       zpool->driver->free(zpool->pool, handle);
+}
+
+/**
+ * zpool_shrink() - Shrink the pool size
+ * @pool       The zpool to shrink.
+ * @pages      The number of pages to shrink the pool.
+ * @reclaimed  The number of pages successfully evicted.
+ *
+ * This attempts to shrink the actual memory size of the pool
+ * by evicting currently used handle(s).  If the pool was
+ * created with no zpool_ops, or the evict call fails for any
+ * of the handles, this will fail.  If non-NULL, the @reclaimed
+ * parameter will be set to the number of pages reclaimed,
+ * which may be more than the number of pages requested.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error/failure.
+ */
+int zpool_shrink(struct zpool *zpool, unsigned int pages,
+                       unsigned int *reclaimed)
+{
+       return zpool->driver->shrink(zpool->pool, pages, reclaimed);
+}
+
+/**
+ * zpool_map_handle() - Map a previously allocated handle into memory
+ * @pool       The zpool that the handle was allocated from
+ * @handle     The handle to map
+ * @mm         How the memory should be mapped
+ *
+ * This maps a previously allocated handle into memory.  The @mm
+ * param indicates to the implementation how the memory will be
+ * used, i.e. read-only, write-only, read-write.  If the
+ * implementation does not support it, the memory will be treated
+ * as read-write.
+ *
+ * This may hold locks, disable interrupts, and/or preemption,
+ * and the zpool_unmap_handle() must be called to undo those
+ * actions.  The code that uses the mapped handle should complete
+ * its operatons on the mapped handle memory quickly and unmap
+ * as soon as possible.  As the implementation may use per-cpu
+ * data, multiple handles should not be mapped concurrently on
+ * any cpu.
+ *
+ * Returns: A pointer to the handle's mapped memory area.
+ */
+void *zpool_map_handle(struct zpool *zpool, unsigned long handle,
+                       enum zpool_mapmode mapmode)
+{
+       return zpool->driver->map(zpool->pool, handle, mapmode);
+}
+
+/**
+ * zpool_unmap_handle() - Unmap a previously mapped handle
+ * @pool       The zpool that the handle was allocated from
+ * @handle     The handle to unmap
+ *
+ * This unmaps a previously mapped handle.  Any locks or other
+ * actions that the implementation took in zpool_map_handle()
+ * will be undone here.  The memory area returned from
+ * zpool_map_handle() should no longer be used after this.
+ */
+void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
+{
+       zpool->driver->unmap(zpool->pool, handle);
+}
+
+/**
+ * zpool_get_total_size() - The total size of the pool
+ * @pool       The zpool to check
+ *
+ * This returns the total size in bytes of the pool.
+ *
+ * Returns: Total size of the zpool in bytes.
+ */
+u64 zpool_get_total_size(struct zpool *zpool)
+{
+       return zpool->driver->total_size(zpool->pool);
+}
+
+static int __init init_zpool(void)
+{
+       pr_info("loaded\n");
+       return 0;
+}
+
+static void __exit exit_zpool(void)
+{
+       pr_info("unloaded\n");
+}
+
+module_init(init_zpool);
+module_exit(exit_zpool);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("Common API for compressed memory storage");
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c

index fe78189624cfce58d29a567b6c379b80367775de..4e2fc83cb394b9b53384fdc82288e7b6ab793b3a 100644 (file)
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -92,6 +92,7 @@
  #include <linux/spinlock.h>
  #include <linux/types.h>
  #include <linux/zsmalloc.h>
+#include <linux/zpool.h>
  
  /*
   * This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -240,6 +241,81 @@ struct mapping_area {
         enum zs_mapmode vm_mm; /* mapping mode */
  };
  
+/* zpool driver */
+
+#ifdef CONFIG_ZPOOL
+
+static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+       return zs_create_pool(gfp);
+}
+
+static void zs_zpool_destroy(void *pool)
+{
+       zs_destroy_pool(pool);
+}
+
+static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+                       unsigned long *handle)
+{
+       *handle = zs_malloc(pool, size);
+       return *handle ? 0 : -1;
+}
+static void zs_zpool_free(void *pool, unsigned long handle)
+{
+       zs_free(pool, handle);
+}
+
+static int zs_zpool_shrink(void *pool, unsigned int pages,
+                       unsigned int *reclaimed)
+{
+       return -EINVAL;
+}
+
+static void *zs_zpool_map(void *pool, unsigned long handle,
+                       enum zpool_mapmode mm)
+{
+       enum zs_mapmode zs_mm;
+
+       switch (mm) {
+       case ZPOOL_MM_RO:
+               zs_mm = ZS_MM_RO;
+               break;
+       case ZPOOL_MM_WO:
+               zs_mm = ZS_MM_WO;
+               break;
+       case ZPOOL_MM_RW: /* fallthru */
+       default:
+               zs_mm = ZS_MM_RW;
+               break;
+       }
+
+       return zs_map_object(pool, handle, zs_mm);
+}
+static void zs_zpool_unmap(void *pool, unsigned long handle)
+{
+       zs_unmap_object(pool, handle);
+}
+
+static u64 zs_zpool_total_size(void *pool)
+{
+       return zs_get_total_size_bytes(pool);
+}
+
+static struct zpool_driver zs_zpool_driver = {
+       .type =         "zsmalloc",
+       .owner =        THIS_MODULE,
+       .create =       zs_zpool_create,
+       .destroy =      zs_zpool_destroy,
+       .malloc =       zs_zpool_malloc,
+       .free =         zs_zpool_free,
+       .shrink =       zs_zpool_shrink,
+       .map =          zs_zpool_map,
+       .unmap =        zs_zpool_unmap,
+       .total_size =   zs_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
  
  /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
  static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
@@ -690,7 +766,7 @@ static inline void __zs_cpu_down(struct mapping_area *area)
  static inline void *__zs_map_object(struct mapping_area *area,
                                 struct page *pages[2], int off, int size)
  {
-       BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
+       BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
         area->vm_addr = area->vm->addr;
         return area->vm_addr + off;
  }
@@ -814,6 +890,10 @@ static void zs_exit(void)
  {
         int cpu;
  
+#ifdef CONFIG_ZPOOL
+       zpool_unregister_driver(&zs_zpool_driver);
+#endif
+
         cpu_notifier_register_begin();
  
         for_each_online_cpu(cpu)
@@ -840,6 +920,10 @@ static int zs_init(void)
  
         cpu_notifier_register_done();
  
+#ifdef CONFIG_ZPOOL
+       zpool_register_driver(&zs_zpool_driver);
+#endif
+
         return 0;
  fail:
         zs_exit();
diff --git a/mm/zswap.c b/mm/zswap.c

index 008388fe7b0ff923b2300504b3a7717869fe12f8..032c21eeab2b0f05ebe73a9a7d1fd820651f3cef 100644 (file)
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -34,7 +34,7 @@
  #include <linux/swap.h>
  #include <linux/crypto.h>
  #include <linux/mempool.h>
-#include <linux/zbud.h>
+#include <linux/zpool.h>
  
  #include <linux/mm_types.h>
  #include <linux/page-flags.h>
@@ -45,8 +45,8 @@
  /*********************************
  * statistics
  **********************************/
-/* Number of memory pages used by the compressed pool */
-static u64 zswap_pool_pages;
+/* Total bytes used by the compressed storage */
+static u64 zswap_pool_total_size;
  /* The number of compressed pages currently stored in zswap */
  static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
  
@@ -89,8 +89,13 @@ static unsigned int zswap_max_pool_percent = 20;
  module_param_named(max_pool_percent,
                         zswap_max_pool_percent, uint, 0644);
  
-/* zbud_pool is shared by all of zswap backend  */
-static struct zbud_pool *zswap_pool;
+/* Compressed storage to use */
+#define ZSWAP_ZPOOL_DEFAULT "zbud"
+static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+module_param_named(zpool, zswap_zpool_type, charp, 0444);
+
+/* zpool is shared by all of zswap backend  */
+static struct zpool *zswap_pool;
  
  /*********************************
  * compression functions
@@ -168,7 +173,7 @@ static void zswap_comp_exit(void)
   *            be held while changing the refcount.  Since the lock must
   *            be held, there is no reason to also make refcount atomic.
   * offset - the swap offset for the entry.  Index into the red-black tree.
- * handle - zbud allocation handle that stores the compressed page data
+ * handle - zpool allocation handle that stores the compressed page data
   * length - the length in bytes of the compressed page data.  Needed during
   *          decompression
   */
@@ -284,15 +289,15 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
  }
  
  /*
- * Carries out the common pattern of freeing and entry's zbud allocation,
+ * Carries out the common pattern of freeing and entry's zpool allocation,
   * freeing the entry itself, and decrementing the number of stored pages.
   */
  static void zswap_free_entry(struct zswap_entry *entry)
  {
-       zbud_free(zswap_pool, entry->handle);
+       zpool_free(zswap_pool, entry->handle);
         zswap_entry_cache_free(entry);
         atomic_dec(&zswap_stored_pages);
-       zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
  }
  
  /* caller must hold the tree lock */
@@ -409,7 +414,7 @@ cleanup:
  static bool zswap_is_full(void)
  {
         return totalram_pages * zswap_max_pool_percent / 100 <
-               zswap_pool_pages;
+               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
  }
  
  /*********************************
@@ -525,7 +530,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
   * the swap cache, the compressed version stored by zswap can be
   * freed.
   */
-static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
+static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
  {
         struct zswap_header *zhdr;
         swp_entry_t swpentry;
@@ -541,9 +546,9 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
         };
  
         /* extract swpentry from data */
-       zhdr = zbud_map(pool, handle);
+       zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
         swpentry = zhdr->swpentry; /* here */
-       zbud_unmap(pool, handle);
+       zpool_unmap_handle(pool, handle);
         tree = zswap_trees[swp_type(swpentry)];
         offset = swp_offset(swpentry);
  
@@ -573,13 +578,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
                 /* decompress */
                 dlen = PAGE_SIZE;
-               src = (u8 *)zbud_map(zswap_pool, entry->handle) +
-                       sizeof(struct zswap_header);
+               src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+                               ZPOOL_MM_RO) + sizeof(struct zswap_header);
                 dst = kmap_atomic(page);
                 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
                                 entry->length, dst, &dlen);
                 kunmap_atomic(dst);
-               zbud_unmap(zswap_pool, entry->handle);
+               zpool_unmap_handle(zswap_pool, entry->handle);
                 BUG_ON(ret);
                 BUG_ON(dlen != PAGE_SIZE);
  
@@ -652,7 +657,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
         /* reclaim space if needed */
         if (zswap_is_full()) {
                 zswap_pool_limit_hit++;
-               if (zbud_reclaim_page(zswap_pool, 8)) {
+               if (zpool_shrink(zswap_pool, 1, NULL)) {
                         zswap_reject_reclaim_fail++;
                         ret = -ENOMEM;
                         goto reject;
@@ -679,7 +684,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
  
         /* store */
         len = dlen + sizeof(struct zswap_header);
-       ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
+       ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
                 &handle);
         if (ret == -ENOSPC) {
                 zswap_reject_compress_poor++;
@@ -689,11 +694,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
                 zswap_reject_alloc_fail++;
                 goto freepage;
         }
-       zhdr = zbud_map(zswap_pool, handle);
+       zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
         zhdr->swpentry = swp_entry(type, offset);
         buf = (u8 *)(zhdr + 1);
         memcpy(buf, dst, dlen);
-       zbud_unmap(zswap_pool, handle);
+       zpool_unmap_handle(zswap_pool, handle);
         put_cpu_var(zswap_dstmem);
  
         /* populate entry */
@@ -716,7 +721,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
  
         /* update stats */
         atomic_inc(&zswap_stored_pages);
-       zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
  
         return 0;
  
@@ -752,13 +757,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
  
         /* decompress */
         dlen = PAGE_SIZE;
-       src = (u8 *)zbud_map(zswap_pool, entry->handle) +
-                       sizeof(struct zswap_header);
+       src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+                       ZPOOL_MM_RO) + sizeof(struct zswap_header);
         dst = kmap_atomic(page);
         ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
                 dst, &dlen);
         kunmap_atomic(dst);
-       zbud_unmap(zswap_pool, entry->handle);
+       zpool_unmap_handle(zswap_pool, entry->handle);
         BUG_ON(ret);
  
         spin_lock(&tree->lock);
@@ -811,7 +816,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
         zswap_trees[type] = NULL;
  }
  
-static struct zbud_ops zswap_zbud_ops = {
+static struct zpool_ops zswap_zpool_ops = {
         .evict = zswap_writeback_entry
  };
  
@@ -869,8 +874,8 @@ static int __init zswap_debugfs_init(void)
                         zswap_debugfs_root, &zswap_written_back_pages);
         debugfs_create_u64("duplicate_entry", S_IRUGO,
                         zswap_debugfs_root, &zswap_duplicate_entry);
-       debugfs_create_u64("pool_pages", S_IRUGO,
-                       zswap_debugfs_root, &zswap_pool_pages);
+       debugfs_create_u64("pool_total_size", S_IRUGO,
+                       zswap_debugfs_root, &zswap_pool_total_size);
         debugfs_create_atomic_t("stored_pages", S_IRUGO,
                         zswap_debugfs_root, &zswap_stored_pages);
  
@@ -895,16 +900,26 @@ static void __exit zswap_debugfs_exit(void) { }
  **********************************/
  static int __init init_zswap(void)
  {
+       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+
         if (!zswap_enabled)
                 return 0;
  
         pr_info("loading zswap\n");
  
-       zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+       zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops);
+       if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
+               pr_info("%s zpool not available\n", zswap_zpool_type);
+               zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+               zswap_pool = zpool_create_pool(zswap_zpool_type, gfp,
+                                       &zswap_zpool_ops);
+       }
         if (!zswap_pool) {
-               pr_err("zbud pool creation failed\n");
+               pr_err("%s zpool not available\n", zswap_zpool_type);
+               pr_err("zpool creation failed\n");
                 goto error;
         }
+       pr_info("using %s pool\n", zswap_zpool_type);
  
         if (zswap_entry_cache_create()) {
                 pr_err("entry cache creation failed\n");
@@ -928,7 +943,7 @@ pcpufail:
  compfail:
         zswap_entry_cache_destory();
  cachefail:
-       zbud_destroy_pool(zswap_pool);
+       zpool_destroy_pool(zswap_pool);
  error:
         return -ENOMEM;
  }
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c

index 022d18ab27a64dabc04ba8e71e1455c10904ae8a..52c43f9042209deaba0be22b549724ad28de1a77 100644 (file)
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
  
         /* Reached the end of the list, so insert after 'frag_entry_last'. */
         if (likely(frag_entry_last)) {
-               hlist_add_after(&frag_entry_last->list, &frag_entry_new->list);
+               hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list);
                 chain->size += skb->len - hdr_size;
                 chain->timestamp = jiffies;
                 ret = true;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c

index b4845f4b2bb414c7403e225528add6840eb26f74..7751c92c8c57fc24b0c18e4d20a095bfa02e9ff0 100644 (file)
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
         }
  
         if (slot)
-               hlist_add_after_rcu(slot, &port->rlist);
+               hlist_add_behind_rcu(&port->rlist, slot);
         else
                 hlist_add_head_rcu(&port->rlist, &br->router_list);
  }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c

index 5afeb5aa4c7cfd9b0f794a45840f6fbd79b90315..e9cb2588e4161c41613c0e50a17ea91b7416a514 100644 (file)
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
                         last = li;
                 }
                 if (last)
-                       hlist_add_after_rcu(&last->hlist, &new->hlist);
+                       hlist_add_behind_rcu(&new->hlist, &last->hlist);
                 else
                         hlist_add_before_rcu(&new->hlist, &li->hlist);
         }
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c

index 731e1e1722d9b4322f3907e57e3a1fd1994230b5..fd0dc47f471dad23566248fecadd571a1a4c00d9 100644 (file)
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
                 last = p;
         }
         if (last)
-               hlist_add_after_rcu(&last->list, &newp->list);
+               hlist_add_behind_rcu(&newp->list, &last->list);
         else
                 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
  out:
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c

index 0525d78ba32866c64c0b31bcf5b9d147855c308a..beeed602aeb379f2ddfbd74a61c798cd371636ba 100644 (file)
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -389,7 +389,7 @@ redo:
                         if (h != h0)
                                 continue;
                         hlist_del(&pol->bydst);
-                       hlist_add_after(entry0, &pol->bydst);
+                       hlist_add_behind(&pol->bydst, entry0);
                 }
                 entry0 = &pol->bydst;
         }
@@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
                         break;
         }
         if (newpos)
-               hlist_add_after(newpos, &policy->bydst);
+               hlist_add_behind(&policy->bydst, newpos);
         else
                 hlist_add_head(&policy->bydst, chain);
         xfrm_pol_hold(policy);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl

index 182be0f124074c6d508d8a97e498ea7925665a1f..31a731e06f5022afd7c040d5688840491b228e9d 100755 (executable)
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -309,9 +309,12 @@ our $Operators     = qr{
  our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x;
  
  our $NonptrType;
+our $NonptrTypeMisordered;
  our $NonptrTypeWithAttr;
  our $Type;
+our $TypeMisordered;
  our $Declare;
+our $DeclareMisordered;
  
  our $NON_ASCII_UTF8    = qr{
         [\xC2-\xDF][\x80-\xBF]               # non-overlong 2-byte
@@ -353,16 +356,36 @@ our $signature_tags = qr{(?xi:
         Cc:
  )};
  
+our @typeListMisordered = (
+       qr{char\s+(?:un)?signed},
+       qr{int\s+(?:(?:un)?signed\s+)?short\s},
+       qr{int\s+short(?:\s+(?:un)?signed)},
+       qr{short\s+int(?:\s+(?:un)?signed)},
+       qr{(?:un)?signed\s+int\s+short},
+       qr{short\s+(?:un)?signed},
+       qr{long\s+int\s+(?:un)?signed},
+       qr{int\s+long\s+(?:un)?signed},
+       qr{long\s+(?:un)?signed\s+int},
+       qr{int\s+(?:un)?signed\s+long},
+       qr{int\s+(?:un)?signed},
+       qr{int\s+long\s+long\s+(?:un)?signed},
+       qr{long\s+long\s+int\s+(?:un)?signed},
+       qr{long\s+long\s+(?:un)?signed\s+int},
+       qr{long\s+long\s+(?:un)?signed},
+       qr{long\s+(?:un)?signed},
+);
+
  our @typeList = (
         qr{void},
-       qr{(?:unsigned\s+)?char},
-       qr{(?:unsigned\s+)?short},
-       qr{(?:unsigned\s+)?int},
-       qr{(?:unsigned\s+)?long},
-       qr{(?:unsigned\s+)?long\s+int},
-       qr{(?:unsigned\s+)?long\s+long},
-       qr{(?:unsigned\s+)?long\s+long\s+int},
-       qr{unsigned},
+       qr{(?:(?:un)?signed\s+)?char},
+       qr{(?:(?:un)?signed\s+)?short\s+int},
+       qr{(?:(?:un)?signed\s+)?short},
+       qr{(?:(?:un)?signed\s+)?int},
+       qr{(?:(?:un)?signed\s+)?long\s+int},
+       qr{(?:(?:un)?signed\s+)?long\s+long\s+int},
+       qr{(?:(?:un)?signed\s+)?long\s+long},
+       qr{(?:(?:un)?signed\s+)?long},
+       qr{(?:un)?signed},
         qr{float},
         qr{double},
         qr{bool},
@@ -372,6 +395,7 @@ our @typeList = (
         qr{${Ident}_t},
         qr{${Ident}_handler},
         qr{${Ident}_handler_fn},
+       @typeListMisordered,
  );
  our @typeListWithAttr = (
         @typeList,
@@ -399,11 +423,6 @@ foreach my $entry (@mode_permission_funcs) {
         $mode_perms_search .= $entry->[0];
  }
  
-our $declaration_macros = qr{(?x:
-       (?:$Storage\s+)?(?:DECLARE|DEFINE)_[A-Z]+\s*\(|
-       (?:$Storage\s+)?LIST_HEAD\s*\(
-)};
-
  our $allowed_asm_includes = qr{(?x:
         irq|
         memory
@@ -413,6 +432,7 @@ our $allowed_asm_includes = qr{(?x:
  sub build_types {
         my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
         my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)";
+       my $Misordered = "(?x:  \n" . join("|\n  ", @typeListMisordered) . "\n)";
         my $allWithAttr = "(?x:  \n" . join("|\n  ", @typeListWithAttr) . "\n)";
         $Modifier       = qr{(?:$Attribute|$Sparse|$mods)};
         $NonptrType     = qr{
@@ -424,6 +444,13 @@ sub build_types {
                         )
                         (?:\s+$Modifier|\s+const)*
                   }x;
+       $NonptrTypeMisordered   = qr{
+                       (?:$Modifier\s+|const\s+)*
+                       (?:
+                               (?:${Misordered}\b)
+                       )
+                       (?:\s+$Modifier|\s+const)*
+                 }x;
         $NonptrTypeWithAttr     = qr{
                         (?:$Modifier\s+|const\s+)*
                         (?:
@@ -435,10 +462,16 @@ sub build_types {
                   }x;
         $Type   = qr{
                         $NonptrType
-                       (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)?
+                       (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
+                       (?:\s+$Inline|\s+$Modifier)*
+                 }x;
+       $TypeMisordered = qr{
+                       $NonptrTypeMisordered
+                       (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
                         (?:\s+$Inline|\s+$Modifier)*
                   }x;
         $Declare        = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type};
+       $DeclareMisordered      = qr{(?:$Storage\s+(?:$Inline\s+)?)?$TypeMisordered};
  }
  build_types();
  
@@ -452,6 +485,12 @@ our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/;
  our $LvalOrFunc        = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*};
  our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)};
  
+our $declaration_macros = qr{(?x:
+       (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,2}\s*\(|
+       (?:$Storage\s+)?LIST_HEAD\s*\(|
+       (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
+)};
+
  sub deparenthesize {
         my ($string) = @_;
         return "" if (!defined($string));
@@ -550,11 +589,43 @@ sub seed_camelcase_includes {
         }
  }
  
+sub git_commit_info {
+       my ($commit, $id, $desc) = @_;
+
+       return ($id, $desc) if ((which("git") eq "") || !(-e ".git"));
+
+       my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`;
+       $output =~ s/^\s*//gm;
+       my @lines = split("\n", $output);
+
+       if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) {
+# Maybe one day convert this block of bash into something that returns
+# all matching commit ids, but it's very slow...
+#
+#              echo "checking commits $1..."
+#              git rev-list --remotes | grep -i "^$1" |
+#              while read line ; do
+#                  git log --format='%H %s' -1 $line |
+#                  echo "commit $(cut -c 1-12,41-)"
+#              done
+       } elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) {
+       } else {
+               $id = substr($lines[0], 0, 12);
+               $desc = substr($lines[0], 41);
+       }
+
+       return ($id, $desc);
+}
+
  $chk_signoff = 0 if ($file);
  
  my @rawlines = ();
  my @lines = ();
  my @fixed = ();
+my @fixed_inserted = ();
+my @fixed_deleted = ();
+my $fixlinenr = -1;
+
  my $vname;
  for my $filename (@ARGV) {
         my $FILE;
@@ -583,6 +654,9 @@ for my $filename (@ARGV) {
         @rawlines = ();
         @lines = ();
         @fixed = ();
+       @fixed_inserted = ();
+       @fixed_deleted = ();
+       $fixlinenr = -1;
  }
  
  exit($exit);
@@ -674,6 +748,18 @@ sub format_email {
         return $formatted_email;
  }
  
+sub which {
+       my ($bin) = @_;
+
+       foreach my $path (split(/:/, $ENV{PATH})) {
+               if (-e "$path/$bin") {
+                       return "$path/$bin";
+               }
+       }
+
+       return "";
+}
+
  sub which_conf {
         my ($conf) = @_;
  
@@ -1483,6 +1569,90 @@ sub report_dump {
         our @report;
  }
  
+sub fixup_current_range {
+       my ($lineRef, $offset, $length) = @_;
+
+       if ($$lineRef =~ /^\@\@ -\d+,\d+ \+(\d+),(\d+) \@\@/) {
+               my $o = $1;
+               my $l = $2;
+               my $no = $o + $offset;
+               my $nl = $l + $length;
+               $$lineRef =~ s/\+$o,$l \@\@/\+$no,$nl \@\@/;
+       }
+}
+
+sub fix_inserted_deleted_lines {
+       my ($linesRef, $insertedRef, $deletedRef) = @_;
+
+       my $range_last_linenr = 0;
+       my $delta_offset = 0;
+
+       my $old_linenr = 0;
+       my $new_linenr = 0;
+
+       my $next_insert = 0;
+       my $next_delete = 0;
+
+       my @lines = ();
+
+       my $inserted = @{$insertedRef}[$next_insert++];
+       my $deleted = @{$deletedRef}[$next_delete++];
+
+       foreach my $old_line (@{$linesRef}) {
+               my $save_line = 1;
+               my $line = $old_line;   #don't modify the array
+               if ($line =~ /^(?:\+\+\+\|\-\-\-)\s+\S+/) {     #new filename
+                       $delta_offset = 0;
+               } elsif ($line =~ /^\@\@ -\d+,\d+ \+\d+,\d+ \@\@/) {    #new hunk
+                       $range_last_linenr = $new_linenr;
+                       fixup_current_range(\$line, $delta_offset, 0);
+               }
+
+               while (defined($deleted) && ${$deleted}{'LINENR'} == $old_linenr) {
+                       $deleted = @{$deletedRef}[$next_delete++];
+                       $save_line = 0;
+                       fixup_current_range(\$lines[$range_last_linenr], $delta_offset--, -1);
+               }
+
+               while (defined($inserted) && ${$inserted}{'LINENR'} == $old_linenr) {
+                       push(@lines, ${$inserted}{'LINE'});
+                       $inserted = @{$insertedRef}[$next_insert++];
+                       $new_linenr++;
+                       fixup_current_range(\$lines[$range_last_linenr], $delta_offset++, 1);
+               }
+
+               if ($save_line) {
+                       push(@lines, $line);
+                       $new_linenr++;
+               }
+
+               $old_linenr++;
+       }
+
+       return @lines;
+}
+
+sub fix_insert_line {
+       my ($linenr, $line) = @_;
+
+       my $inserted = {
+               LINENR => $linenr,
+               LINE => $line,
+       };
+       push(@fixed_inserted, $inserted);
+}
+
+sub fix_delete_line {
+       my ($linenr, $line) = @_;
+
+       my $deleted = {
+               LINENR => $linenr,
+               LINE => $line,
+       };
+
+       push(@fixed_deleted, $deleted);
+}
+
  sub ERROR {
         my ($type, $msg) = @_;
  
@@ -1637,11 +1807,13 @@ sub process {
         my $signoff = 0;
         my $is_patch = 0;
  
-       my $in_header_lines = 1;
+       my $in_header_lines = $file ? 0 : 1;
         my $in_commit_log = 0;          #Scanning lines before patch
-
+       my $reported_maintainer_file = 0;
         my $non_utf8_charset = 0;
  
+       my $last_blank_line = 0;
+
         our @report = ();
         our $cnt_lines = 0;
         our $cnt_error = 0;
@@ -1759,8 +1931,10 @@ sub process {
  
         $realcnt = 0;
         $linenr = 0;
+       $fixlinenr = -1;
         foreach my $line (@lines) {
                 $linenr++;
+               $fixlinenr++;
                 my $sline = $line;      #copy of $line
                 $sline =~ s/$;/ /g;     #with comments as spaces
  
@@ -1891,7 +2065,7 @@ sub process {
                                 if (WARN("BAD_SIGN_OFF",
                                          "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) &&
                                     $fix) {
-                                       $fixed[$linenr - 1] =
+                                       $fixed[$fixlinenr] =
                                             "$ucfirst_sign_off $email";
                                 }
                         }
@@ -1899,7 +2073,7 @@ sub process {
                                 if (WARN("BAD_SIGN_OFF",
                                          "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) &&
                                     $fix) {
-                                       $fixed[$linenr - 1] =
+                                       $fixed[$fixlinenr] =
                                             "$ucfirst_sign_off $email";
                                 }
  
@@ -1908,7 +2082,7 @@ sub process {
                                 if (WARN("BAD_SIGN_OFF",
                                          "Use a single space after $ucfirst_sign_off\n" . $herecurr) &&
                                     $fix) {
-                                       $fixed[$linenr - 1] =
+                                       $fixed[$fixlinenr] =
                                             "$ucfirst_sign_off $email";
                                 }
                         }
@@ -1956,6 +2130,31 @@ sub process {
                               "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr);
                 }
  
+# Check for improperly formed commit descriptions
+               if ($in_commit_log &&
+                   $line =~ /\bcommit\s+[0-9a-f]{5,}/i &&
+                   $line !~ /\b[Cc]ommit [0-9a-f]{12,16} \("/) {
+                       $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i;
+                       my $init_char = $1;
+                       my $orig_commit = lc($2);
+                       my $id = '01234567890ab';
+                       my $desc = 'commit description';
+                       ($id, $desc) = git_commit_info($orig_commit, $id, $desc);
+                       ERROR("GIT_COMMIT_ID",
+                             "Please use 12 to 16 chars for the git commit ID like: '${init_char}ommit $id (\"$desc\")'\n" . $herecurr);
+               }
+
+# Check for added, moved or deleted files
+               if (!$reported_maintainer_file && !$in_commit_log &&
+                   ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ ||
+                    $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ ||
+                    ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ &&
+                     (defined($1) || defined($2))))) {
+                       $reported_maintainer_file = 1;
+                       WARN("FILE_PATH_CHANGES",
+                            "added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
+               }
+
  # Check for wrappage within a valid hunk of the file
                 if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
                         ERROR("CORRUPTED_PATCH",
@@ -1993,7 +2192,8 @@ sub process {
  # Check if it's the start of a commit log
  # (not a header line and we haven't seen the patch filename)
                 if ($in_header_lines && $realfile =~ /^$/ &&
-                   $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) {
+                   !($rawline =~ /^\s+\S/ ||
+                     $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) {
                         $in_header_lines = 0;
                         $in_commit_log = 1;
                 }
@@ -2021,14 +2221,14 @@ sub process {
                         if (ERROR("DOS_LINE_ENDINGS",
                                   "DOS line endings\n" . $herevet) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/[\s\015]+$//;
+                               $fixed[$fixlinenr] =~ s/[\s\015]+$//;
                         }
                 } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
                         my $herevet = "$here\n" . cat_vet($rawline) . "\n";
                         if (ERROR("TRAILING_WHITESPACE",
                                   "trailing whitespace\n" . $herevet) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\s+$//;
+                               $fixed[$fixlinenr] =~ s/\s+$//;
                         }
  
                         $rpt_cleaners = 1;
@@ -2049,7 +2249,7 @@ sub process {
  # Only applies when adding the entry originally, after that we do not have
  # sufficient context to determine whether it is indeed long enough.
                 if ($realfile =~ /Kconfig/ &&
-                   $line =~ /.\s*config\s+/) {
+                   $line =~ /^\+\s*config\s+/) {
                         my $length = 0;
                         my $cnt = $realcnt;
                         my $ln = $linenr + 1;
@@ -2062,10 +2262,11 @@ sub process {
                                 $is_end = $lines[$ln - 1] =~ /^\+/;
  
                                 next if ($f =~ /^-/);
+                               last if (!$file && $f =~ /^\@\@/);
  
-                               if ($lines[$ln - 1] =~ /.\s*(?:bool|tristate)\s*\"/) {
+                               if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate)\s*\"/) {
                                         $is_start = 1;
-                               } elsif ($lines[$ln - 1] =~ /.\s*(?:---)?help(?:---)?$/) {
+                               } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
                                         $length = -1;
                                 }
  
@@ -2161,12 +2362,18 @@ sub process {
                              "quoted string split across lines\n" . $hereprev);
                 }
  
+# check for missing a space in a string concatination
+               if ($prevrawline =~ /[^\\]\w"$/ && $rawline =~ /^\+[\t ]+"\w/) {
+                       WARN('MISSING_SPACE',
+                            "break quoted strings at a space character\n" . $hereprev);
+               }
+
  # check for spaces before a quoted newline
                 if ($rawline =~ /^.*\".*\s\\n/) {
                         if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
                                  "unnecessary whitespace before a quoted newline\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
+                               $fixed[$fixlinenr] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
                         }
  
                 }
@@ -2203,7 +2410,7 @@ sub process {
                         if (ERROR("CODE_INDENT",
                                   "code indent should use tabs where possible\n" . $herevet) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+                               $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
                         }
                 }
  
@@ -2213,9 +2420,9 @@ sub process {
                         if (WARN("SPACE_BEFORE_TAB",
                                 "please, no space before tabs\n" . $herevet) &&
                             $fix) {
-                               while ($fixed[$linenr - 1] =~
+                               while ($fixed[$fixlinenr] =~
                                            s/(^\+.*) {8,8}+\t/$1\t\t/) {}
-                               while ($fixed[$linenr - 1] =~
+                               while ($fixed[$fixlinenr] =~
                                            s/(^\+.*) +\t/$1\t/) {}
                         }
                 }
@@ -2249,19 +2456,19 @@ sub process {
                                         if (CHK("PARENTHESIS_ALIGNMENT",
                                                 "Alignment should match open parenthesis\n" . $hereprev) &&
                                             $fix && $line =~ /^\+/) {
-                                               $fixed[$linenr - 1] =~
+                                               $fixed[$fixlinenr] =~
                                                     s/^\+[ \t]*/\+$goodtabindent/;
                                         }
                                 }
                         }
                 }
  
-               if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) {
+               if ($line =~ /^\+.*\(\s*$Type\s*\)[ \t]+(?!$Assignment|$Arithmetic|{)/) {
                         if (CHK("SPACING",
-                               "No space is necessary after a cast\n" . $hereprev) &&
+                               "No space is necessary after a cast\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
-                                   s/^(\+.*\*[ \t]*\))[ \t]+/$1/;
+                               $fixed[$fixlinenr] =~
+                                   s/(\(\s*$Type\s*\))[ \t]+/$1/;
                         }
                 }
  
@@ -2291,10 +2498,44 @@ sub process {
                              "networking block comments put the trailing */ on a separate line\n" . $herecurr);
                 }
  
+# check for missing blank lines after struct/union declarations
+# with exceptions for various attributes and macros
+               if ($prevline =~ /^[\+ ]};?\s*$/ &&
+                   $line =~ /^\+/ &&
+                   !($line =~ /^\+\s*$/ ||
+                     $line =~ /^\+\s*EXPORT_SYMBOL/ ||
+                     $line =~ /^\+\s*MODULE_/i ||
+                     $line =~ /^\+\s*\#\s*(?:end|elif|else)/ ||
+                     $line =~ /^\+[a-z_]*init/ ||
+                     $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ ||
+                     $line =~ /^\+\s*DECLARE/ ||
+                     $line =~ /^\+\s*__setup/)) {
+                       if (CHK("LINE_SPACING",
+                               "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev) &&
+                           $fix) {
+                               fix_insert_line($fixlinenr, "\+");
+                       }
+               }
+
+# check for multiple consecutive blank lines
+               if ($prevline =~ /^[\+ ]\s*$/ &&
+                   $line =~ /^\+\s*$/ &&
+                   $last_blank_line != ($linenr - 1)) {
+                       if (CHK("LINE_SPACING",
+                               "Please don't use multiple blank lines\n" . $hereprev) &&
+                           $fix) {
+                               fix_delete_line($fixlinenr, $rawline);
+                       }
+
+                       $last_blank_line = $linenr;
+               }
+
  # check for missing blank lines after declarations
                 if ($sline =~ /^\+\s+\S/ &&                     #Not at char 1
                         # actual declarations
                     ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+                       # function pointer declarations
+                    $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
                         # foo bar; where foo is some local typedef or #define
                      $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
                         # known declaration macros
@@ -2307,6 +2548,8 @@ sub process {
                       $prevline =~ /(?:\{\s*|\\)$/) &&
                         # looks like a declaration
                     !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+                       # function pointer declarations
+                     $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
                         # foo bar; where foo is some local typedef or #define
                       $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
                         # known declaration macros
@@ -2321,8 +2564,11 @@ sub process {
                       $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
                         # indentation of previous and current line are the same
                     (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
-                       WARN("SPACING",
-                            "Missing a blank line after declarations\n" . $hereprev);
+                       if (WARN("LINE_SPACING",
+                                "Missing a blank line after declarations\n" . $hereprev) &&
+                           $fix) {
+                               fix_insert_line($fixlinenr, "\+");
+                       }
                 }
  
  # check for spaces at the beginning of a line.
@@ -2335,13 +2581,33 @@ sub process {
                         if (WARN("LEADING_SPACE",
                                  "please, no spaces at the start of a line\n" . $herevet) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+                               $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
                         }
                 }
  
  # check we are in a valid C source file if not then ignore this hunk
                 next if ($realfile !~ /\.(h|c)$/);
  
+# check indentation of any line with a bare else
+# if the previous line is a break or return and is indented 1 tab more...
+               if ($sline =~ /^\+([\t]+)(?:}[ \t]*)?else(?:[ \t]*{)?\s*$/) {
+                       my $tabs = length($1) + 1;
+                       if ($prevline =~ /^\+\t{$tabs,$tabs}(?:break|return)\b/) {
+                               WARN("UNNECESSARY_ELSE",
+                                    "else is not generally useful after a break or return\n" . $hereprev);
+                       }
+               }
+
+# check indentation of a line with a break;
+# if the previous line is a goto or return and is indented the same # of tabs
+               if ($sline =~ /^\+([\t]+)break\s*;\s*$/) {
+                       my $tabs = $1;
+                       if ($prevline =~ /^\+$tabs(?:goto|return)\b/) {
+                               WARN("UNNECESSARY_BREAK",
+                                    "break is not useful after a goto or return\n" . $hereprev);
+                       }
+               }
+
  # discourage the addition of CONFIG_EXPERIMENTAL in #if(def).
                 if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) {
                         WARN("CONFIG_EXPERIMENTAL",
@@ -2477,7 +2743,7 @@ sub process {
  
  # if/while/etc brace do not go on next line, unless defining a do while loop,
  # or if that brace on the next line is for something else
-               if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
+               if ($line =~ /(.*)\b((?:if|while|for|switch|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
                         my $pre_ctx = "$1$2";
  
                         my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
@@ -2504,7 +2770,7 @@ sub process {
                         #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
                         #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
  
-                       if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
+                       if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
                                 ERROR("OPEN_BRACE",
                                       "that open brace { should be on the previous line\n" .
                                         "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
@@ -2523,7 +2789,7 @@ sub process {
                 }
  
  # Check relative indent for conditionals and blocks.
-               if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
+               if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
                         ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
                                 ctx_statement_block($linenr, $realcnt, 0)
                                         if (!defined $stat);
@@ -2654,8 +2920,18 @@ sub process {
  # check for initialisation to aggregates open brace on the next line
                 if ($line =~ /^.\s*{/ &&
                     $prevline =~ /(?:^|[^=])=\s*$/) {
-                       ERROR("OPEN_BRACE",
-                             "that open brace { should be on the previous line\n" . $hereprev);
+                       if (ERROR("OPEN_BRACE",
+                                 "that open brace { should be on the previous line\n" . $hereprev) &&
+                           $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                               fix_delete_line($fixlinenr - 1, $prevrawline);
+                               fix_delete_line($fixlinenr, $rawline);
+                               my $fixedline = $prevrawline;
+                               $fixedline =~ s/\s*=\s*$/ = {/;
+                               fix_insert_line($fixlinenr, $fixedline);
+                               $fixedline = $line;
+                               $fixedline =~ s/^(.\s*){\s*/$1/;
+                               fix_insert_line($fixlinenr, $fixedline);
+                       }
                 }
  
  #
@@ -2680,10 +2956,10 @@ sub process {
                         if (ERROR("C99_COMMENTS",
                                   "do not use C99 // comments\n" . $herecurr) &&
                             $fix) {
-                               my $line = $fixed[$linenr - 1];
+                               my $line = $fixed[$fixlinenr];
                                 if ($line =~ /\/\/(.*)$/) {
                                         my $comment = trim($1);
-                                       $fixed[$linenr - 1] =~ s@\/\/(.*)$@/\* $comment \*/@;
+                                       $fixed[$fixlinenr] =~ s@\/\/(.*)$@/\* $comment \*/@;
                                 }
                         }
                 }
@@ -2742,7 +3018,7 @@ sub process {
                                   "do not initialise globals to 0 or NULL\n" .
                                       $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
+                               $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
                         }
                 }
  # check for static initialisers.
@@ -2751,10 +3027,17 @@ sub process {
                                   "do not initialise statics to 0 or NULL\n" .
                                       $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
+                               $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
                         }
                 }
  
+# check for misordered declarations of char/short/int/long with signed/unsigned
+               while ($sline =~ m{(\b$TypeMisordered\b)}g) {
+                       my $tmp = trim($1);
+                       WARN("MISORDERED_TYPE",
+                            "type '$tmp' should be specified in [[un]signed] [short|int|long|long long] order\n" . $herecurr);
+               }
+
  # check for static const char * arrays.
                 if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) {
                         WARN("STATIC_CONST_CHAR_ARRAY",
@@ -2781,7 +3064,7 @@ sub process {
                         if (ERROR("FUNCTION_WITHOUT_ARGS",
                                   "Bad function definition - $1() should probably be $1(void)\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
+                               $fixed[$fixlinenr] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
                         }
                 }
  
@@ -2790,7 +3073,7 @@ sub process {
                         if (WARN("DEFINE_PCI_DEVICE_TABLE",
                                  "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
+                               $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
                         }
                 }
  
@@ -2827,7 +3110,7 @@ sub process {
                                         my $sub_from = $ident;
                                         my $sub_to = $ident;
                                         $sub_to =~ s/\Q$from\E/$to/;
-                                       $fixed[$linenr - 1] =~
+                                       $fixed[$fixlinenr] =~
                                             s@\Q$sub_from\E@$sub_to@;
                                 }
                         }
@@ -2855,7 +3138,7 @@ sub process {
                                         my $sub_from = $match;
                                         my $sub_to = $match;
                                         $sub_to =~ s/\Q$from\E/$to/;
-                                       $fixed[$linenr - 1] =~
+                                       $fixed[$fixlinenr] =~
                                             s@\Q$sub_from\E@$sub_to@;
                                 }
                         }
@@ -2917,7 +3200,7 @@ sub process {
                         if (WARN("PREFER_PR_LEVEL",
                                  "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/\bpr_warning\b/pr_warn/;
                         }
                 }
@@ -2933,17 +3216,40 @@ sub process {
  
  # function brace can't be on same line, except for #defines of do while,
  # or if closed on same line
-               if (($line=~/$Type\s*$Ident\(.*\).*\s{/) and
+               if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and
                     !($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) {
-                       ERROR("OPEN_BRACE",
-                             "open brace '{' following function declarations go on the next line\n" . $herecurr);
+                       if (ERROR("OPEN_BRACE",
+                                 "open brace '{' following function declarations go on the next line\n" . $herecurr) &&
+                           $fix) {
+                               fix_delete_line($fixlinenr, $rawline);
+                               my $fixed_line = $rawline;
+                               $fixed_line =~ /(^..*$Type\s*$Ident\(.*\)\s*){(.*)$/;
+                               my $line1 = $1;
+                               my $line2 = $2;
+                               fix_insert_line($fixlinenr, ltrim($line1));
+                               fix_insert_line($fixlinenr, "\+{");
+                               if ($line2 !~ /^\s*$/) {
+                                       fix_insert_line($fixlinenr, "\+\t" . trim($line2));
+                               }
+                       }
                 }
  
  # open braces for enum, union and struct go on the same line.
                 if ($line =~ /^.\s*{/ &&
                     $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) {
-                       ERROR("OPEN_BRACE",
-                             "open brace '{' following $1 go on the same line\n" . $hereprev);
+                       if (ERROR("OPEN_BRACE",
+                                 "open brace '{' following $1 go on the same line\n" . $hereprev) &&
+                           $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                               fix_delete_line($fixlinenr - 1, $prevrawline);
+                               fix_delete_line($fixlinenr, $rawline);
+                               my $fixedline = rtrim($prevrawline) . " {";
+                               fix_insert_line($fixlinenr, $fixedline);
+                               $fixedline = $rawline;
+                               $fixedline =~ s/^(.\s*){\s*/$1\t/;
+                               if ($fixedline !~ /^\+\s*$/) {
+                                       fix_insert_line($fixlinenr, $fixedline);
+                               }
+                       }
                 }
  
  # missing space after union, struct or enum definition
@@ -2951,7 +3257,7 @@ sub process {
                         if (WARN("SPACING",
                                  "missing space after $1 definition\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/;
                         }
                 }
@@ -3021,7 +3327,7 @@ sub process {
                         }
  
                         if (show_type("SPACING") && $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex;
                         }
                 }
@@ -3038,7 +3344,7 @@ sub process {
                                 if (ERROR("BRACKET_SPACE",
                                           "space prohibited before open square bracket '['\n" . $herecurr) &&
                                     $fix) {
-                                   $fixed[$linenr - 1] =~
+                                   $fixed[$fixlinenr] =~
                                         s/^(\+.*?)\s+\[/$1\[/;
                                 }
                         }
@@ -3073,7 +3379,7 @@ sub process {
                                 if (WARN("SPACING",
                                          "space prohibited between function name and open parenthesis '('\n" . $herecurr) &&
                                              $fix) {
-                                       $fixed[$linenr - 1] =~
+                                       $fixed[$fixlinenr] =~
                                             s/\b$name\s+\(/$name\(/;
                                 }
                         }
@@ -3341,8 +3647,8 @@ sub process {
                                 $fixed_line = $fixed_line . $fix_elements[$#elements];
                         }
  
-                       if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) {
-                               $fixed[$linenr - 1] = $fixed_line;
+                       if ($fix && $line_fixed && $fixed_line ne $fixed[$fixlinenr]) {
+                               $fixed[$fixlinenr] = $fixed_line;
                         }
  
  
@@ -3353,7 +3659,7 @@ sub process {
                         if (WARN("SPACING",
                                  "space prohibited before semicolon\n" . $herecurr) &&
                             $fix) {
-                               1 while $fixed[$linenr - 1] =~
+                               1 while $fixed[$fixlinenr] =~
                                     s/^(\+.*\S)\s+;/$1;/;
                         }
                 }
@@ -3386,7 +3692,7 @@ sub process {
                         if (ERROR("SPACING",
                                   "space required before the open brace '{'\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\))){/$1 {/;
+                               $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/;
                         }
                 }
  
@@ -3404,7 +3710,7 @@ sub process {
                         if (ERROR("SPACING",
                                   "space required after that close brace '}'\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/}((?!(?:,|;|\)))\S)/} $1/;
                         }
                 }
@@ -3414,7 +3720,7 @@ sub process {
                         if (ERROR("SPACING",
                                   "space prohibited after that open square bracket '['\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/\[\s+/\[/;
                         }
                 }
@@ -3422,7 +3728,7 @@ sub process {
                         if (ERROR("SPACING",
                                   "space prohibited before that close square bracket ']'\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/\s+\]/\]/;
                         }
                 }
@@ -3433,7 +3739,7 @@ sub process {
                         if (ERROR("SPACING",
                                   "space prohibited after that open parenthesis '('\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/\(\s+/\(/;
                         }
                 }
@@ -3443,18 +3749,27 @@ sub process {
                         if (ERROR("SPACING",
                                   "space prohibited before that close parenthesis ')'\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               print("fixlinenr: <$fixlinenr> fixed[fixlinenr]: <$fixed[$fixlinenr]>\n");
+                               $fixed[$fixlinenr] =~
                                     s/\s+\)/\)/;
                         }
                 }
  
+# check unnecessary parentheses around addressof/dereference single $Lvals
+# ie: &(foo->bar) should be &foo->bar and *(foo->bar) should be *foo->bar
+
+               while ($line =~ /(?:[^&]&\s*|\*)\(\s*($Ident\s*(?:$Member\s*)+)\s*\)/g) {
+                       CHK("UNNECESSARY_PARENTHESES",
+                           "Unnecessary parentheses around $1\n" . $herecurr);
+                   }
+
  #goto labels aren't indented, allow a single space however
                 if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
                    !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
                         if (WARN("INDENTED_LABEL",
                                  "labels should not be indented\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/^(.)\s+/$1/;
                         }
                 }
@@ -3516,7 +3831,7 @@ sub process {
                         if (ERROR("SPACING",
                                   "space required before the open parenthesis '('\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/\b(if|while|for|switch)\(/$1 \(/;
                         }
                 }
@@ -3606,7 +3921,7 @@ sub process {
  # if should not continue a brace
                 if ($line =~ /}\s*if\b/) {
                         ERROR("TRAILING_STATEMENTS",
-                             "trailing statements should be on next line\n" .
+                             "trailing statements should be on next line (or did you mean 'else if'?)\n" .
                                 $herecurr);
                 }
  # case and default should not have general statements after them
@@ -3622,14 +3937,26 @@ sub process {
  
                 # Check for }<nl>else {, these must be at the same
                 # indent level to be relevant to each other.
-               if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and
-                                               $previndent == $indent) {
-                       ERROR("ELSE_AFTER_BRACE",
-                             "else should follow close brace '}'\n" . $hereprev);
+               if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ &&
+                   $previndent == $indent) {
+                       if (ERROR("ELSE_AFTER_BRACE",
+                                 "else should follow close brace '}'\n" . $hereprev) &&
+                           $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                               fix_delete_line($fixlinenr - 1, $prevrawline);
+                               fix_delete_line($fixlinenr, $rawline);
+                               my $fixedline = $prevrawline;
+                               $fixedline =~ s/}\s*$//;
+                               if ($fixedline !~ /^\+\s*$/) {
+                                       fix_insert_line($fixlinenr, $fixedline);
+                               }
+                               $fixedline = $rawline;
+                               $fixedline =~ s/^(.\s*)else/$1} else/;
+                               fix_insert_line($fixlinenr, $fixedline);
+                       }
                 }
  
-               if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and
-                                               $previndent == $indent) {
+               if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ &&
+                   $previndent == $indent) {
                         my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0);
  
                         # Find out what is on the end of the line after the
@@ -3638,8 +3965,18 @@ sub process {
                         $s =~ s/\n.*//g;
  
                         if ($s =~ /^\s*;/) {
-                               ERROR("WHILE_AFTER_BRACE",
-                                     "while should follow close brace '}'\n" . $hereprev);
+                               if (ERROR("WHILE_AFTER_BRACE",
+                                         "while should follow close brace '}'\n" . $hereprev) &&
+                                   $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                                       fix_delete_line($fixlinenr - 1, $prevrawline);
+                                       fix_delete_line($fixlinenr, $rawline);
+                                       my $fixedline = $prevrawline;
+                                       my $trailing = $rawline;
+                                       $trailing =~ s/^\+//;
+                                       $trailing = trim($trailing);
+                                       $fixedline =~ s/}\s*$/} $trailing/;
+                                       fix_insert_line($fixlinenr, $fixedline);
+                               }
                         }
                 }
  
@@ -3653,7 +3990,7 @@ sub process {
                                          "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) &&
                                     $fix) {
                                         my $hexval = sprintf("0x%x", oct($var));
-                                       $fixed[$linenr - 1] =~
+                                       $fixed[$fixlinenr] =~
                                             s/\b$var\b/$hexval/;
                                 }
                         }
@@ -3689,7 +4026,7 @@ sub process {
                         if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
                                  "Whitespace after \\ makes next lines useless\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\s+$//;
+                               $fixed[$fixlinenr] =~ s/\s+$//;
                         }
                 }
  
@@ -3762,7 +4099,7 @@ sub process {
                             $dstat !~ /^(?:$Ident|-?$Constant),$/ &&                    # 10, // foo(),
                             $dstat !~ /^(?:$Ident|-?$Constant);$/ &&                    # foo();
                             $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ &&          # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz
-                           $dstat !~ /^'X'$/ &&                                        # character constants
+                           $dstat !~ /^'X'$/ && $dstat !~ /^'XX'$/ &&                  # character constants
                             $dstat !~ /$exceptions/ &&
                             $dstat !~ /^\.$Ident\s*=/ &&                                # .foo =
                             $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ &&          # stringification #foo
@@ -4014,6 +4351,23 @@ sub process {
                         }
                 }
  
+# check for unnecessary "Out of Memory" messages
+               if ($line =~ /^\+.*\b$logFunctions\s*\(/ &&
+                   $prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*\)/ &&
+                   (defined $1 || defined $3) &&
+                   $linenr > 3) {
+                       my $testval = $2;
+                       my $testline = $lines[$linenr - 3];
+
+                       my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0);
+#                      print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n");
+
+                       if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) {
+                               WARN("OOM_MESSAGE",
+                                    "Possible unnecessary 'out of memory' message\n" . $hereprev);
+                       }
+               }
+
  # check for bad placement of section $InitAttribute (e.g.: __initdata)
                 if ($line =~ /(\b$InitAttribute\b)/) {
                         my $attr = $1;
@@ -4027,7 +4381,7 @@ sub process {
                                       WARN("MISPLACED_INIT",
                                            "$attr should be placed after $var\n" . $herecurr))) &&
                                     $fix) {
-                                       $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
+                                       $fixed[$fixlinenr] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
                                 }
                         }
                 }
@@ -4041,7 +4395,7 @@ sub process {
                         if (ERROR("INIT_ATTRIBUTE",
                                   "Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/$InitAttributeData/${attr_prefix}initconst/;
                         }
                 }
@@ -4052,12 +4406,12 @@ sub process {
                         if (ERROR("INIT_ATTRIBUTE",
                                   "Use of $attr requires a separate use of const\n" . $herecurr) &&
                             $fix) {
-                               my $lead = $fixed[$linenr - 1] =~
+                               my $lead = $fixed[$fixlinenr] =~
                                     /(^\+\s*(?:static\s+))/;
                                 $lead = rtrim($1);
                                 $lead = "$lead " if ($lead !~ /^\+$/);
                                 $lead = "${lead}const ";
-                               $fixed[$linenr - 1] =~ s/(^\+\s*(?:static\s+))/$lead/;
+                               $fixed[$fixlinenr] =~ s/(^\+\s*(?:static\s+))/$lead/;
                         }
                 }
  
@@ -4070,7 +4424,7 @@ sub process {
                         if (WARN("CONSTANT_CONVERSION",
                                  "$constant_func should be $func\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\b$constant_func\b/$func/g;
+                               $fixed[$fixlinenr] =~ s/\b$constant_func\b/$func/g;
                         }
                 }
  
@@ -4120,7 +4474,7 @@ sub process {
                         if (ERROR("SPACING",
                                   "exactly one space required after that #$1\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                     s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /;
                         }
  
@@ -4168,7 +4522,7 @@ sub process {
                         if (WARN("INLINE",
                                  "plain inline is preferred over $1\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/;
+                               $fixed[$fixlinenr] =~ s/\b(__inline__|__inline)\b/inline/;
  
                         }
                 }
@@ -4193,7 +4547,7 @@ sub process {
                         if (WARN("PREFER_PRINTF",
                                  "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
+                               $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
  
                         }
                 }
@@ -4204,7 +4558,7 @@ sub process {
                         if (WARN("PREFER_SCANF",
                                  "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
+                               $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
                         }
                 }
  
@@ -4219,7 +4573,7 @@ sub process {
                         if (WARN("SIZEOF_PARENTHESIS",
                                  "sizeof $1 should be sizeof($1)\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
+                               $fixed[$fixlinenr] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
                         }
                 }
  
@@ -4242,7 +4596,7 @@ sub process {
                                 if (WARN("PREFER_SEQ_PUTS",
                                          "Prefer seq_puts to seq_printf\n" . $herecurr) &&
                                     $fix) {
-                                       $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/;
+                                       $fixed[$fixlinenr] =~ s/\bseq_printf\b/seq_puts/;
                                 }
                         }
                 }
@@ -4271,7 +4625,7 @@ sub process {
                         if (WARN("PREFER_ETHER_ADDR_COPY",
                                  "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
+                               $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
                         }
                 }
  
@@ -4359,7 +4713,7 @@ sub process {
                         if (CHK("AVOID_EXTERNS",
                                 "extern prototypes should be avoided in .h files\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
+                               $fixed[$fixlinenr] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
                         }
                 }
  
@@ -4419,23 +4773,24 @@ sub process {
  
  # check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc
                 if ($^V && $^V ge 5.10.0 &&
-                   $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/) {
+                   $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) {
                         my $oldfunc = $3;
                         my $a1 = $4;
                         my $a2 = $10;
                         my $newfunc = "kmalloc_array";
                         $newfunc = "kcalloc" if ($oldfunc eq "kzalloc");
-                       if ($a1 =~ /^sizeof\s*\S/ || $a2 =~ /^sizeof\s*\S/) {
+                       my $r1 = $a1;
+                       my $r2 = $a2;
+                       if ($a1 =~ /^sizeof\s*\S/) {
+                               $r1 = $a2;
+                               $r2 = $a1;
+                       }
+                       if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ &&
+                           !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) {
                                 if (WARN("ALLOC_WITH_MULTIPLY",
                                          "Prefer $newfunc over $oldfunc with multiply\n" . $herecurr) &&
                                     $fix) {
-                                       my $r1 = $a1;
-                                       my $r2 = $a2;
-                                       if ($a1 =~ /^sizeof\s*\S/) {
-                                               $r1 = $a2;
-                                               $r2 = $a1;
-                                       }
-                                       $fixed[$linenr - 1] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
+                                       $fixed[$fixlinenr] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
  
                                 }
                         }
@@ -4459,17 +4814,17 @@ sub process {
                         if (WARN("ONE_SEMICOLON",
                                  "Statements terminations use 1 semicolon\n" . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g;
+                               $fixed[$fixlinenr] =~ s/(\s*;\s*){2,}$/;/g;
                         }
                 }
  
-# check for case / default statements not preceeded by break/fallthrough/switch
+# check for case / default statements not preceded by break/fallthrough/switch
                 if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) {
                         my $has_break = 0;
                         my $has_statement = 0;
                         my $count = 0;
                         my $prevline = $linenr;
-                       while ($prevline > 1 && $count < 3 && !$has_break) {
+                       while ($prevline > 1 && ($file || $count < 3) && !$has_break) {
                                 $prevline--;
                                 my $rline = $rawlines[$prevline - 1];
                                 my $fline = $lines[$prevline - 1];
@@ -4507,7 +4862,7 @@ sub process {
                         if (WARN("USE_FUNC",
                                  "__func__ should be used instead of gcc specific __FUNCTION__\n"  . $herecurr) &&
                             $fix) {
-                               $fixed[$linenr - 1] =~ s/\b__FUNCTION__\b/__func__/g;
+                               $fixed[$fixlinenr] =~ s/\b__FUNCTION__\b/__func__/g;
                         }
                 }
  
@@ -4750,12 +5105,16 @@ sub process {
         hash_show_words(\%use_type, "Used");
         hash_show_words(\%ignore_type, "Ignored");
  
-       if ($clean == 0 && $fix && "@rawlines" ne "@fixed") {
+       if ($clean == 0 && $fix &&
+           ("@rawlines" ne "@fixed" ||
+            $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) {
                 my $newfile = $filename;
                 $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace);
                 my $linecount = 0;
                 my $f;
  
+               @fixed = fix_inserted_deleted_lines(\@fixed, \@fixed_inserted, \@fixed_deleted);
+
                 open($f, '>', $newfile)
                     or die "$P: Can't open $newfile for write\n";
                 foreach my $fixed_line (@fixed) {
@@ -4763,7 +5122,7 @@ sub process {
                         if ($file) {
                                 if ($linecount > 3) {
                                         $fixed_line =~ s/^\+//;
-                                       print $f $fixed_line. "\n";
+                                       print $f $fixed_line . "\n";
                                 }
                         } else {
                                 print $f $fixed_line . "\n";
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 7 Aug 2014 04:14:42 +0000 (21:14 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 7 Aug 2014 04:14:42 +0000 (21:14 -0700)
Documentation/RCU/whatisRCU.txt		patch \| blob \| blame \| history
Documentation/kernel-parameters.txt		patch \| blob \| blame \| history
Documentation/trace/postprocess/trace-vmscan-postprocess.pl		patch \| blob \| blame \| history
Makefile		patch \| blob \| blame \| history
arch/arm/mm/dma-mapping.c		patch \| blob \| blame \| history
arch/ia64/mm/init.c		patch \| blob \| blame \| history
arch/powerpc/kvm/Makefile		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_64_mmu_hv.c		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_hv_builtin.c		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_hv_cma.c	[deleted file]	patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_hv_cma.h	[deleted file]	patch \| blob \| blame \| history
arch/powerpc/mm/mem.c		patch \| blob \| blame \| history
arch/score/include/uapi/asm/ptrace.h		patch \| blob \| blame \| history
arch/sh/drivers/dma/Kconfig		patch \| blob \| blame \| history
arch/sh/include/asm/io_noioport.h		patch \| blob \| blame \| history
arch/sh/include/cpu-sh4/cpu/dma-register.h		patch \| blob \| blame \| history
arch/sh/include/cpu-sh4a/cpu/dma.h		patch \| blob \| blame \| history
arch/sh/kernel/cpu/sh4a/clock-sh7724.c		patch \| blob \| blame \| history
arch/sh/kernel/time.c		patch \| blob \| blame \| history
arch/sh/mm/asids-debugfs.c		patch \| blob \| blame \| history
arch/sh/mm/init.c		patch \| blob \| blame \| history
arch/tile/kernel/module.c		patch \| blob \| blame \| history
arch/x86/mm/fault.c		patch \| blob \| blame \| history
arch/x86/mm/init_32.c		patch \| blob \| blame \| history
arch/x86/mm/init_64.c		patch \| blob \| blame \| history
drivers/ata/Kconfig		patch \| blob \| blame \| history
drivers/ata/libata-core.c		patch \| blob \| blame \| history
drivers/base/Kconfig		patch \| blob \| blame \| history
drivers/base/dma-contiguous.c		patch \| blob \| blame \| history
drivers/base/memory.c		patch \| blob \| blame \| history
drivers/base/node.c		patch \| blob \| blame \| history
drivers/block/zram/zram_drv.c		patch \| blob \| blame \| history
drivers/block/zram/zram_drv.h		patch \| blob \| blame \| history
drivers/firmware/memmap.c		patch \| blob \| blame \| history
drivers/gpu/drm/drm_hashtab.c		patch \| blob \| blame \| history
drivers/hwmon/asus_atk0110.c		patch \| blob \| blame \| history
drivers/lguest/core.c		patch \| blob \| blame \| history
drivers/net/ethernet/intel/i40e/i40e_ethtool.c		patch \| blob \| blame \| history
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c		patch \| blob \| blame \| history
drivers/staging/android/binder.c		patch \| blob \| blame \| history
drivers/staging/lustre/lustre/libcfs/hash.c		patch \| blob \| blame \| history
drivers/tty/sysrq.c		patch \| blob \| blame \| history
fs/fscache/main.c		patch \| blob \| blame \| history
fs/logfs/readwrite.c		patch \| blob \| blame \| history
fs/namespace.c		patch \| blob \| blame \| history
fs/notify/fanotify/fanotify.c		patch \| blob \| blame \| history
fs/notify/fanotify/fanotify_user.c		patch \| blob \| blame \| history
fs/notify/inode_mark.c		patch \| blob \| blame \| history
fs/notify/inotify/inotify_fsnotify.c		patch \| blob \| blame \| history
fs/notify/inotify/inotify_user.c		patch \| blob \| blame \| history
fs/notify/notification.c		patch \| blob \| blame \| history
fs/notify/vfsmount_mark.c		patch \| blob \| blame \| history
fs/ntfs/file.c		patch \| blob \| blame \| history
fs/ocfs2/alloc.c		patch \| blob \| blame \| history
fs/ocfs2/dlm/dlmdomain.c		patch \| blob \| blame \| history
fs/ocfs2/dlm/dlmmaster.c		patch \| blob \| blame \| history
fs/ocfs2/move_extents.c		patch \| blob \| blame \| history
fs/ocfs2/refcounttree.c		patch \| blob \| blame \| history
fs/ocfs2/slot_map.c		patch \| blob \| blame \| history
fs/proc/meminfo.c		patch \| blob \| blame \| history
fs/proc/task_mmu.c		patch \| blob \| blame \| history
fs/squashfs/file_direct.c		patch \| blob \| blame \| history
fs/squashfs/super.c		patch \| blob \| blame \| history
include/linux/bitmap.h		patch \| blob \| blame \| history
include/linux/byteorder/generic.h		patch \| blob \| blame \| history
include/linux/cma.h	[new file with mode: 0644]	patch \| blob
include/linux/dma-contiguous.h		patch \| blob \| blame \| history
include/linux/fs.h		patch \| blob \| blame \| history
include/linux/fsnotify_backend.h		patch \| blob \| blame \| history
include/linux/gfp.h		patch \| blob \| blame \| history
include/linux/glob.h	[new file with mode: 0644]	patch \| blob
include/linux/highmem.h		patch \| blob \| blame \| history
include/linux/huge_mm.h		patch \| blob \| blame \| history
include/linux/hugetlb.h		patch \| blob \| blame \| history
include/linux/kernel.h		patch \| blob \| blame \| history
include/linux/klist.h		patch \| blob \| blame \| history
include/linux/list.h		patch \| blob \| blame \| history
include/linux/memblock.h		patch \| blob \| blame \| history
include/linux/memory_hotplug.h		patch \| blob \| blame \| history
include/linux/mmdebug.h		patch \| blob \| blame \| history
include/linux/mmu_notifier.h		patch \| blob \| blame \| history
include/linux/mmzone.h		patch \| blob \| blame \| history
include/linux/nodemask.h		patch \| blob \| blame \| history
include/linux/oom.h		patch \| blob \| blame \| history
include/linux/page-flags.h		patch \| blob \| blame \| history
include/linux/pagemap.h		patch \| blob \| blame \| history
include/linux/printk.h		patch \| blob \| blame \| history
include/linux/rculist.h		patch \| blob \| blame \| history
include/linux/swap.h		patch \| blob \| blame \| history
include/linux/vmalloc.h		patch \| blob \| blame \| history
include/linux/zbud.h		patch \| blob \| blame \| history
include/linux/zlib.h		patch \| blob \| blame \| history
include/linux/zpool.h	[new file with mode: 0644]	patch \| blob
include/trace/events/migrate.h		patch \| blob \| blame \| history
include/trace/events/pagemap.h		patch \| blob \| blame \| history
init/Kconfig		patch \| blob \| blame \| history
kernel/auditfilter.c		patch \| blob \| blame \| history
kernel/exit.c		patch \| blob \| blame \| history
kernel/printk/printk.c		patch \| blob \| blame \| history
kernel/smp.c		patch \| blob \| blame \| history
kernel/sysctl.c		patch \| blob \| blame \| history
kernel/watchdog.c		patch \| blob \| blame \| history
lib/Kconfig		patch \| blob \| blame \| history
lib/Kconfig.debug		patch \| blob \| blame \| history
lib/Makefile		patch \| blob \| blame \| history
lib/bitmap.c		patch \| blob \| blame \| history
lib/cmdline.c		patch \| blob \| blame \| history
lib/glob.c	[new file with mode: 0644]	patch \| blob
lib/klist.c		patch \| blob \| blame \| history
lib/list_sort.c		patch \| blob \| blame \| history
lib/string_helpers.c		patch \| blob \| blame \| history
lib/test-kstrtox.c		patch \| blob \| blame \| history
lib/zlib_deflate/deflate.c		patch \| blob \| blame \| history
lib/zlib_inflate/inflate.c		patch \| blob \| blame \| history
mm/Kconfig		patch \| blob \| blame \| history
mm/Makefile		patch \| blob \| blame \| history
mm/cma.c	[new file with mode: 0644]	patch \| blob
mm/filemap.c		patch \| blob \| blame \| history
mm/gup.c		patch \| blob \| blame \| history
mm/highmem.c		patch \| blob \| blame \| history
mm/huge_memory.c		patch \| blob \| blame \| history
mm/hugetlb.c		patch \| blob \| blame \| history
mm/hwpoison-inject.c		patch \| blob \| blame \| history
mm/internal.h		patch \| blob \| blame \| history
mm/madvise.c		patch \| blob \| blame \| history
mm/memcontrol.c		patch \| blob \| blame \| history
mm/memory-failure.c		patch \| blob \| blame \| history
mm/memory.c		patch \| blob \| blame \| history
mm/memory_hotplug.c		patch \| blob \| blame \| history
mm/mlock.c		patch \| blob \| blame \| history
mm/mmap.c		patch \| blob \| blame \| history
mm/mmu_notifier.c		patch \| blob \| blame \| history
mm/oom_kill.c		patch \| blob \| blame \| history
mm/page-writeback.c		patch \| blob \| blame \| history
mm/page_alloc.c		patch \| blob \| blame \| history
mm/readahead.c		patch \| blob \| blame \| history
mm/shmem.c		patch \| blob \| blame \| history
mm/slab.c		patch \| blob \| blame \| history
mm/slab.h		patch \| blob \| blame \| history
mm/slab_common.c		patch \| blob \| blame \| history
mm/slub.c		patch \| blob \| blame \| history
mm/swap.c		patch \| blob \| blame \| history
mm/util.c		patch \| blob \| blame \| history
mm/vmalloc.c		patch \| blob \| blame \| history
mm/vmscan.c		patch \| blob \| blame \| history
mm/vmstat.c		patch \| blob \| blame \| history
mm/zbud.c		patch \| blob \| blame \| history
mm/zpool.c	[new file with mode: 0644]	patch \| blob
mm/zsmalloc.c		patch \| blob \| blame \| history
mm/zswap.c		patch \| blob \| blame \| history
net/batman-adv/fragmentation.c		patch \| blob \| blame \| history
net/bridge/br_multicast.c		patch \| blob \| blame \| history
net/ipv4/fib_trie.c		patch \| blob \| blame \| history
net/ipv6/addrlabel.c		patch \| blob \| blame \| history
net/xfrm/xfrm_policy.c		patch \| blob \| blame \| history
scripts/checkpatch.pl		patch \| blob \| blame \| history