Merge branch 'akpm' (patchbomb from Andrew Morton)
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 7 Aug 2014 04:14:42 +0000 (21:14 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 7 Aug 2014 04:14:42 +0000 (21:14 -0700)
Merge incoming from Andrew Morton:
 - Various misc things.
 - arch/sh updates.
 - Part of ocfs2.  Review is slow.
 - Slab updates.
 - Most of -mm.
 - printk updates.
 - lib/ updates.
 - checkpatch updates.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (226 commits)
  checkpatch: update $declaration_macros, add uninitialized_var
  checkpatch: warn on missing spaces in broken up quoted
  checkpatch: fix false positives for --strict "space after cast" test
  checkpatch: fix false positive MISSING_BREAK warnings with --file
  checkpatch: add test for native c90 types in unusual order
  checkpatch: add signed generic types
  checkpatch: add short int to c variable types
  checkpatch: add for_each tests to indentation and brace tests
  checkpatch: fix brace style misuses of else and while
  checkpatch: add --fix option for a couple OPEN_BRACE misuses
  checkpatch: use the correct indentation for which()
  checkpatch: add fix_insert_line and fix_delete_line helpers
  checkpatch: add ability to insert and delete lines to patch/file
  checkpatch: add an index variable for fixed lines
  checkpatch: warn on break after goto or return with same tab indentation
  checkpatch: emit a warning on file add/move/delete
  checkpatch: add test for commit id formatting style in commit log
  checkpatch: emit fewer kmalloc_array/kcalloc conversion warnings
  checkpatch: improve "no space after cast" test
  checkpatch: allow multiple const * types
  ...

156 files changed:
Documentation/RCU/whatisRCU.txt
Documentation/kernel-parameters.txt
Documentation/trace/postprocess/trace-vmscan-postprocess.pl
Makefile
arch/arm/mm/dma-mapping.c
arch/ia64/mm/init.c
arch/powerpc/kvm/Makefile
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_cma.c [deleted file]
arch/powerpc/kvm/book3s_hv_cma.h [deleted file]
arch/powerpc/mm/mem.c
arch/score/include/uapi/asm/ptrace.h
arch/sh/drivers/dma/Kconfig
arch/sh/include/asm/io_noioport.h
arch/sh/include/cpu-sh4/cpu/dma-register.h
arch/sh/include/cpu-sh4a/cpu/dma.h
arch/sh/kernel/cpu/sh4a/clock-sh7724.c
arch/sh/kernel/time.c
arch/sh/mm/asids-debugfs.c
arch/sh/mm/init.c
arch/tile/kernel/module.c
arch/x86/mm/fault.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
drivers/ata/Kconfig
drivers/ata/libata-core.c
drivers/base/Kconfig
drivers/base/dma-contiguous.c
drivers/base/memory.c
drivers/base/node.c
drivers/block/zram/zram_drv.c
drivers/block/zram/zram_drv.h
drivers/firmware/memmap.c
drivers/gpu/drm/drm_hashtab.c
drivers/hwmon/asus_atk0110.c
drivers/lguest/core.c
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/staging/android/binder.c
drivers/staging/lustre/lustre/libcfs/hash.c
drivers/tty/sysrq.c
fs/fscache/main.c
fs/logfs/readwrite.c
fs/namespace.c
fs/notify/fanotify/fanotify.c
fs/notify/fanotify/fanotify_user.c
fs/notify/inode_mark.c
fs/notify/inotify/inotify_fsnotify.c
fs/notify/inotify/inotify_user.c
fs/notify/notification.c
fs/notify/vfsmount_mark.c
fs/ntfs/file.c
fs/ocfs2/alloc.c
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmmaster.c
fs/ocfs2/move_extents.c
fs/ocfs2/refcounttree.c
fs/ocfs2/slot_map.c
fs/proc/meminfo.c
fs/proc/task_mmu.c
fs/squashfs/file_direct.c
fs/squashfs/super.c
include/linux/bitmap.h
include/linux/byteorder/generic.h
include/linux/cma.h [new file with mode: 0644]
include/linux/dma-contiguous.h
include/linux/fs.h
include/linux/fsnotify_backend.h
include/linux/gfp.h
include/linux/glob.h [new file with mode: 0644]
include/linux/highmem.h
include/linux/huge_mm.h
include/linux/hugetlb.h
include/linux/kernel.h
include/linux/klist.h
include/linux/list.h
include/linux/memblock.h
include/linux/memory_hotplug.h
include/linux/mmdebug.h
include/linux/mmu_notifier.h
include/linux/mmzone.h
include/linux/nodemask.h
include/linux/oom.h
include/linux/page-flags.h
include/linux/pagemap.h
include/linux/printk.h
include/linux/rculist.h
include/linux/swap.h
include/linux/vmalloc.h
include/linux/zbud.h
include/linux/zlib.h
include/linux/zpool.h [new file with mode: 0644]
include/trace/events/migrate.h
include/trace/events/pagemap.h
init/Kconfig
kernel/auditfilter.c
kernel/exit.c
kernel/printk/printk.c
kernel/smp.c
kernel/sysctl.c
kernel/watchdog.c
lib/Kconfig
lib/Kconfig.debug
lib/Makefile
lib/bitmap.c
lib/cmdline.c
lib/glob.c [new file with mode: 0644]
lib/klist.c
lib/list_sort.c
lib/string_helpers.c
lib/test-kstrtox.c
lib/zlib_deflate/deflate.c
lib/zlib_inflate/inflate.c
mm/Kconfig
mm/Makefile
mm/cma.c [new file with mode: 0644]
mm/filemap.c
mm/gup.c
mm/highmem.c
mm/huge_memory.c
mm/hugetlb.c
mm/hwpoison-inject.c
mm/internal.h
mm/madvise.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/memory_hotplug.c
mm/mlock.c
mm/mmap.c
mm/mmu_notifier.c
mm/oom_kill.c
mm/page-writeback.c
mm/page_alloc.c
mm/readahead.c
mm/shmem.c
mm/slab.c
mm/slab.h
mm/slab_common.c
mm/slub.c
mm/swap.c
mm/util.c
mm/vmalloc.c
mm/vmscan.c
mm/vmstat.c
mm/zbud.c
mm/zpool.c [new file with mode: 0644]
mm/zsmalloc.c
mm/zswap.c
net/batman-adv/fragmentation.c
net/bridge/br_multicast.c
net/ipv4/fib_trie.c
net/ipv6/addrlabel.c
net/xfrm/xfrm_policy.c
scripts/checkpatch.pl

index 49b8551a3b68e00de3e9819f4a3b20c6cf4b3fa3..e48c57f1943bab162530416afb35f3acdff1729d 100644 (file)
@@ -818,7 +818,7 @@ RCU pointer/list update:
        list_add_tail_rcu
        list_del_rcu
        list_replace_rcu
-       hlist_add_after_rcu
+       hlist_add_behind_rcu
        hlist_add_before_rcu
        hlist_add_head_rcu
        hlist_del_rcu
index 883901b9ac4f11d0ea47d42769c8ee3c6e3e3360..9344d833b7ea231d42ecf7731851f71f9602033e 100644 (file)
@@ -1716,8 +1716,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        7 (KERN_DEBUG)          debug-level messages
 
        log_buf_len=n[KMG]      Sets the size of the printk ring buffer,
-                       in bytes.  n must be a power of two.  The default
-                       size is set in the kernel config file.
+                       in bytes.  n must be a power of two and greater
+                       than the minimal size. The minimal size is defined
+                       by LOG_BUF_SHIFT kernel config parameter. There is
+                       also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
+                       that allows to increase the default size depending on
+                       the number of CPUs. See init/Kconfig for more details.
 
        logo.nologo     [FB] Disables display of the built-in Linux logo.
                        This may be used to provide more screen space for
index 78c9a7b2b58fdb0a55f72c23890e07dcf8f912b2..8f961ef2b4577d4b7485e84bd379dc5ebd022f47 100644 (file)
@@ -47,6 +47,10 @@ use constant HIGH_KSWAPD_REWAKEUP            => 21;
 use constant HIGH_NR_SCANNED                   => 22;
 use constant HIGH_NR_TAKEN                     => 23;
 use constant HIGH_NR_RECLAIMED                 => 24;
+use constant HIGH_NR_FILE_SCANNED              => 25;
+use constant HIGH_NR_ANON_SCANNED              => 26;
+use constant HIGH_NR_FILE_RECLAIMED            => 27;
+use constant HIGH_NR_ANON_RECLAIMED            => 28;
 
 my %perprocesspid;
 my %perprocess;
@@ -56,14 +60,18 @@ my $opt_read_procstat;
 
 my $total_wakeup_kswapd;
 my ($total_direct_reclaim, $total_direct_nr_scanned);
+my ($total_direct_nr_file_scanned, $total_direct_nr_anon_scanned);
 my ($total_direct_latency, $total_kswapd_latency);
 my ($total_direct_nr_reclaimed);
+my ($total_direct_nr_file_reclaimed, $total_direct_nr_anon_reclaimed);
 my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async);
 my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async);
 my ($total_kswapd_nr_scanned, $total_kswapd_wake);
+my ($total_kswapd_nr_file_scanned, $total_kswapd_nr_anon_scanned);
 my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async);
 my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async);
 my ($total_kswapd_nr_reclaimed);
+my ($total_kswapd_nr_file_reclaimed, $total_kswapd_nr_anon_reclaimed);
 
 # Catch sigint and exit on request
 my $sigint_report = 0;
@@ -374,6 +382,7 @@ EVENT_PROCESS:
                        }
                        my $isolate_mode = $1;
                        my $nr_scanned = $4;
+                       my $file = $6;
 
                        # To closer match vmstat scanning statistics, only count isolate_both
                        # and isolate_inactive as scanning. isolate_active is rotation
@@ -382,6 +391,11 @@ EVENT_PROCESS:
                        # isolate_both     == 3
                        if ($isolate_mode != 2) {
                                $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+                               if ($file == 1) {
+                                       $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned;
+                               } else {
+                                       $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned;
+                               }
                        }
                } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
                        $details = $6;
@@ -391,8 +405,19 @@ EVENT_PROCESS:
                                print "         $regex_lru_shrink_inactive/o\n";
                                next;
                        }
+
                        my $nr_reclaimed = $4;
+                       my $flags = $6;
+                       my $file = 0;
+                       if ($flags =~ /RECLAIM_WB_FILE/) {
+                               $file = 1;
+                       }
                        $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed;
+                       if ($file) {
+                               $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED} += $nr_reclaimed;
+                       } else {
+                               $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED} += $nr_reclaimed;
+                       }
                } elsif ($tracepoint eq "mm_vmscan_writepage") {
                        $details = $6;
                        if ($details !~ /$regex_writepage/o) {
@@ -493,7 +518,11 @@ sub dump_stats {
                $total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN};
                $total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
                $total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+               $total_direct_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+               $total_direct_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
                $total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+               $total_direct_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+               $total_direct_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
                $total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
                $total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
                $total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -513,7 +542,11 @@ sub dump_stats {
                        $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN},
                        $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD},
                        $stats{$process_pid}->{HIGH_NR_SCANNED},
+                       $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+                       $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
                        $stats{$process_pid}->{HIGH_NR_RECLAIMED},
+                       $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+                       $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
                        $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
                        $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC},
                        $this_reclaim_delay / 1000);
@@ -552,7 +585,11 @@ sub dump_stats {
 
                $total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE};
                $total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+               $total_kswapd_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+               $total_kswapd_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
                $total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+               $total_kswapd_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+               $total_kswapd_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
                $total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
                $total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
                $total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -563,7 +600,11 @@ sub dump_stats {
                        $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE},
                        $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP},
                        $stats{$process_pid}->{HIGH_NR_SCANNED},
+                       $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+                       $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
                        $stats{$process_pid}->{HIGH_NR_RECLAIMED},
+                       $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+                       $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
                        $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
                        $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC});
 
@@ -594,7 +635,11 @@ sub dump_stats {
        print "\nSummary\n";
        print "Direct reclaims:                         $total_direct_reclaim\n";
        print "Direct reclaim pages scanned:            $total_direct_nr_scanned\n";
+       print "Direct reclaim file pages scanned:       $total_direct_nr_file_scanned\n";
+       print "Direct reclaim anon pages scanned:       $total_direct_nr_anon_scanned\n";
        print "Direct reclaim pages reclaimed:          $total_direct_nr_reclaimed\n";
+       print "Direct reclaim file pages reclaimed:     $total_direct_nr_file_reclaimed\n";
+       print "Direct reclaim anon pages reclaimed:     $total_direct_nr_anon_reclaimed\n";
        print "Direct reclaim write file sync I/O:      $total_direct_writepage_file_sync\n";
        print "Direct reclaim write anon sync I/O:      $total_direct_writepage_anon_sync\n";
        print "Direct reclaim write file async I/O:     $total_direct_writepage_file_async\n";
@@ -604,7 +649,11 @@ sub dump_stats {
        print "\n";
        print "Kswapd wakeups:                          $total_kswapd_wake\n";
        print "Kswapd pages scanned:                    $total_kswapd_nr_scanned\n";
+       print "Kswapd file pages scanned:               $total_kswapd_nr_file_scanned\n";
+       print "Kswapd anon pages scanned:               $total_kswapd_nr_anon_scanned\n";
        print "Kswapd pages reclaimed:                  $total_kswapd_nr_reclaimed\n";
+       print "Kswapd file pages reclaimed:             $total_kswapd_nr_file_reclaimed\n";
+       print "Kswapd anon pages reclaimed:             $total_kswapd_nr_anon_reclaimed\n";
        print "Kswapd reclaim write file sync I/O:      $total_kswapd_writepage_file_sync\n";
        print "Kswapd reclaim write anon sync I/O:      $total_kswapd_writepage_anon_sync\n";
        print "Kswapd reclaim write file async I/O:     $total_kswapd_writepage_file_async\n";
@@ -629,7 +678,11 @@ sub aggregate_perprocesspid() {
                $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
                $perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP};
                $perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED};
+               $perprocess{$process}->{HIGH_NR_FILE_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED};
+               $perprocess{$process}->{HIGH_NR_ANON_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED};
                $perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED};
+               $perprocess{$process}->{HIGH_NR_FILE_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+               $perprocess{$process}->{HIGH_NR_ANON_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
                $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
                $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
                $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
index d0901b46b4bfd15aedfd3a0ccfa2d3c3255ff8fc..a897c50db515d7adf8dec34f7c3298c9988be953 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -621,6 +621,9 @@ else
 KBUILD_CFLAGS  += -O2
 endif
 
+# Tell gcc to never replace conditional load with a non-conditional one
+KBUILD_CFLAGS  += $(call cc-option,--param=allow-store-data-races=0)
+
 ifdef CONFIG_READABLE_ASM
 # Disable optimizations that make assembler listings hard to read.
 # reorder blocks reorders the control in the function
@@ -636,6 +639,22 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif
 
 # Handle stack protector mode.
+#
+# Since kbuild can potentially perform two passes (first with the old
+# .config values and then with updated .config values), we cannot error out
+# if a desired compiler option is unsupported. If we were to error, kbuild
+# could never get to the second pass and actually notice that we changed
+# the option to something that was supported.
+#
+# Additionally, we don't want to fallback and/or silently change which compiler
+# flags will be used, since that leads to producing kernels with different
+# security feature characteristics depending on the compiler used. ("But I
+# selected CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!")
+#
+# The middle ground is to warn here so that the failed option is obvious, but
+# to let the build fail with bad compiler flags so that we can't produce a
+# kernel when there is a CONFIG and compiler mismatch.
+#
 ifdef CONFIG_CC_STACKPROTECTOR_REGULAR
   stackp-flag := -fstack-protector
   ifeq ($(call cc-option, $(stackp-flag)),)
index 1f88db06b133c1f253cfc6ced41483cf672ce4a0..7a996aaa061e99fb5d5f3875da51890b0862b1c1 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/io.h>
 #include <linux/vmalloc.h>
 #include <linux/sizes.h>
+#include <linux/cma.h>
 
 #include <asm/memory.h>
 #include <asm/highmem.h>
index 25c350264a41012bba72d3d992674aea17d32fba..892d43e32f3b5995936a24d91b4229cb38e78afb 100644 (file)
@@ -631,7 +631,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
        pgdat = NODE_DATA(nid);
 
-       zone = pgdat->node_zones + ZONE_NORMAL;
+       zone = pgdat->node_zones +
+               zone_for_memory(nid, start, size, ZONE_NORMAL);
        ret = __add_pages(nid, zone, start_pfn, nr_pages);
 
        if (ret)
index ce569b6bf4d8c0f7ad7f87b76a9b1effd9e573de..72905c30082e65a025398a69561d96728ae2714b 100644 (file)
@@ -90,7 +90,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
        book3s_hv_rm_mmu.o \
        book3s_hv_ras.o \
        book3s_hv_builtin.o \
-       book3s_hv_cma.o \
        $(kvm-book3s_64-builtin-xics-objs-y)
 endif
 
index 68468d695f12ab864281f19a3cfd872a6976fc8d..a01744fc3483160092d45c26764daeeaf2bfb041 100644 (file)
@@ -37,8 +37,6 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
-#include "book3s_hv_cma.h"
-
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970   63
 
@@ -64,10 +62,10 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
        }
 
        kvm->arch.hpt_cma_alloc = 0;
-       VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
        page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
        if (page) {
                hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+               memset((void *)hpt, 0, (1 << order));
                kvm->arch.hpt_cma_alloc = 1;
        }
 
index 7cde8a6652056c26f4e05343439c90ca43328188..6cf498a9bc987d62ba335c2e10aabe00dc3f4384 100644 (file)
 #include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/sizes.h>
+#include <linux/cma.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
-#include "book3s_hv_cma.h"
+#define KVM_CMA_CHUNK_ORDER    18
+
 /*
  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
  * should be power of 2.
@@ -43,6 +45,8 @@ static unsigned long kvm_cma_resv_ratio = 5;
 unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
 EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
+static struct cma *kvm_cma;
+
 /* Work out RMLS (real mode limit selector) field value for a given RMA size.
    Assumes POWER7 or PPC970. */
 static inline int lpcr_rmls(unsigned long rma_size)
@@ -97,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
        ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
        if (!ri)
                return NULL;
-       page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+       page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
        if (!page)
                goto err_out;
        atomic_set(&ri->use_count, 1);
@@ -112,7 +116,7 @@ EXPORT_SYMBOL_GPL(kvm_alloc_rma);
 void kvm_release_rma(struct kvm_rma_info *ri)
 {
        if (atomic_dec_and_test(&ri->use_count)) {
-               kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+               cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
                kfree(ri);
        }
 }
@@ -131,16 +135,18 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
        unsigned long align_pages = HPT_ALIGN_PAGES;
 
+       VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
        /* Old CPUs require HPT aligned on a multiple of its size */
        if (!cpu_has_feature(CPU_FTR_ARCH_206))
                align_pages = nr_pages;
-       return kvm_alloc_cma(nr_pages, align_pages);
+       return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
 void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 {
-       kvm_release_cma(page, nr_pages);
+       cma_release(kvm_cma, page, nr_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
@@ -179,7 +185,8 @@ void __init kvm_cma_reserve(void)
                        align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
 
                align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
-               kvm_cma_declare_contiguous(selected_size, align_size);
+               cma_declare_contiguous(0, selected_size, 0, align_size,
+                       KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
        }
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
deleted file mode 100644 (file)
index d9d3d85..0000000
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-#define pr_fmt(fmt) "kvm_cma: " fmt
-
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-#  define DEBUG
-#endif
-#endif
-
-#include <linux/memblock.h>
-#include <linux/mutex.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-
-#include "book3s_hv_cma.h"
-
-struct kvm_cma {
-       unsigned long   base_pfn;
-       unsigned long   count;
-       unsigned long   *bitmap;
-};
-
-static DEFINE_MUTEX(kvm_cma_mutex);
-static struct kvm_cma kvm_cma_area;
-
-/**
- * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
- *                               for kvm hash pagetable
- * @size:  Size of the reserved memory.
- * @alignment:  Alignment for the contiguous memory area
- *
- * This function reserves memory for kvm cma area. It should be
- * called by arch code when early allocator (memblock or bootmem)
- * is still activate.
- */
-long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
-{
-       long base_pfn;
-       phys_addr_t addr;
-       struct kvm_cma *cma = &kvm_cma_area;
-
-       pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
-
-       if (!size)
-               return -EINVAL;
-       /*
-        * Sanitise input arguments.
-        * We should be pageblock aligned for CMA.
-        */
-       alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
-       size = ALIGN(size, alignment);
-       /*
-        * Reserve memory
-        * Use __memblock_alloc_base() since
-        * memblock_alloc_base() panic()s.
-        */
-       addr = __memblock_alloc_base(size, alignment, 0);
-       if (!addr) {
-               base_pfn = -ENOMEM;
-               goto err;
-       } else
-               base_pfn = PFN_DOWN(addr);
-
-       /*
-        * Each reserved area must be initialised later, when more kernel
-        * subsystems (like slab allocator) are available.
-        */
-       cma->base_pfn = base_pfn;
-       cma->count    = size >> PAGE_SHIFT;
-       pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
-       return 0;
-err:
-       pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-       return base_pfn;
-}
-
-/**
- * kvm_alloc_cma() - allocate pages from contiguous area
- * @nr_pages: Requested number of pages.
- * @align_pages: Requested alignment in number of pages
- *
- * This function allocates memory buffer for hash pagetable.
- */
-struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
-{
-       int ret;
-       struct page *page = NULL;
-       struct kvm_cma *cma = &kvm_cma_area;
-       unsigned long chunk_count, nr_chunk;
-       unsigned long mask, pfn, pageno, start = 0;
-
-
-       if (!cma || !cma->count)
-               return NULL;
-
-       pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
-                (void *)cma, nr_pages, align_pages);
-
-       if (!nr_pages)
-               return NULL;
-       /*
-        * align mask with chunk size. The bit tracks pages in chunk size
-        */
-       VM_BUG_ON(!is_power_of_2(align_pages));
-       mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
-       BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
-
-       chunk_count = cma->count >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-       nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
-       mutex_lock(&kvm_cma_mutex);
-       for (;;) {
-               pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
-                                                   start, nr_chunk, mask);
-               if (pageno >= chunk_count)
-                       break;
-
-               pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
-               ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
-               if (ret == 0) {
-                       bitmap_set(cma->bitmap, pageno, nr_chunk);
-                       page = pfn_to_page(pfn);
-                       memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
-                       break;
-               } else if (ret != -EBUSY) {
-                       break;
-               }
-               pr_debug("%s(): memory range at %p is busy, retrying\n",
-                        __func__, pfn_to_page(pfn));
-               /* try again with a bit different memory target */
-               start = pageno + mask + 1;
-       }
-       mutex_unlock(&kvm_cma_mutex);
-       pr_debug("%s(): returned %p\n", __func__, page);
-       return page;
-}
-
-/**
- * kvm_release_cma() - release allocated pages for hash pagetable
- * @pages: Allocated pages.
- * @nr_pages: Number of allocated pages.
- *
- * This function releases memory allocated by kvm_alloc_cma().
- * It returns false when provided pages do not belong to contiguous area and
- * true otherwise.
- */
-bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
-{
-       unsigned long pfn;
-       unsigned long nr_chunk;
-       struct kvm_cma *cma = &kvm_cma_area;
-
-       if (!cma || !pages)
-               return false;
-
-       pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
-
-       pfn = page_to_pfn(pages);
-
-       if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
-               return false;
-
-       VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
-       nr_chunk = nr_pages >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
-       mutex_lock(&kvm_cma_mutex);
-       bitmap_clear(cma->bitmap,
-                    (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
-                    nr_chunk);
-       free_contig_range(pfn, nr_pages);
-       mutex_unlock(&kvm_cma_mutex);
-
-       return true;
-}
-
-static int __init kvm_cma_activate_area(unsigned long base_pfn,
-                                       unsigned long count)
-{
-       unsigned long pfn = base_pfn;
-       unsigned i = count >> pageblock_order;
-       struct zone *zone;
-
-       WARN_ON_ONCE(!pfn_valid(pfn));
-       zone = page_zone(pfn_to_page(pfn));
-       do {
-               unsigned j;
-               base_pfn = pfn;
-               for (j = pageblock_nr_pages; j; --j, pfn++) {
-                       WARN_ON_ONCE(!pfn_valid(pfn));
-                       /*
-                        * alloc_contig_range requires the pfn range
-                        * specified to be in the same zone. Make this
-                        * simple by forcing the entire CMA resv range
-                        * to be in the same zone.
-                        */
-                       if (page_zone(pfn_to_page(pfn)) != zone)
-                               return -EINVAL;
-               }
-               init_cma_reserved_pageblock(pfn_to_page(base_pfn));
-       } while (--i);
-       return 0;
-}
-
-static int __init kvm_cma_init_reserved_areas(void)
-{
-       int bitmap_size, ret;
-       unsigned long chunk_count;
-       struct kvm_cma *cma = &kvm_cma_area;
-
-       pr_debug("%s()\n", __func__);
-       if (!cma->count)
-               return 0;
-       chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-       bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
-       cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-       if (!cma->bitmap)
-               return -ENOMEM;
-
-       ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
-       if (ret)
-               goto error;
-       return 0;
-
-error:
-       kfree(cma->bitmap);
-       return ret;
-}
-core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
deleted file mode 100644 (file)
index 655144f..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-
-#ifndef __POWERPC_KVM_CMA_ALLOC_H__
-#define __POWERPC_KVM_CMA_ALLOC_H__
-/*
- * Both RMA and Hash page allocation will be multiple of 256K.
- */
-#define KVM_CMA_CHUNK_ORDER    18
-
-extern struct page *kvm_alloc_cma(unsigned long nr_pages,
-                                 unsigned long align_pages);
-extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
-extern long kvm_cma_declare_contiguous(phys_addr_t size,
-                                      phys_addr_t alignment) __init;
-#endif
index 2c8e90f5789e4a72feb2e5a019a63a9e1dded518..e0f7a189c48ea440dc2e05ac659eb46895490590 100644 (file)
@@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
                return -EINVAL;
 
        /* this should work for most non-highmem platforms */
-       zone = pgdata->node_zones;
+       zone = pgdata->node_zones +
+               zone_for_memory(nid, start, size, 0);
 
        return __add_pages(nid, zone, start_pfn, nr_pages);
 }
index f59771a3f127749c9f642e8881f2f5ef477fc10f..5c5e794058be1b234c601a11ef099df400e3df6a 100644 (file)
@@ -4,17 +4,6 @@
 #define PTRACE_GETREGS         12
 #define PTRACE_SETREGS         13
 
-#define PC             32
-#define CONDITION      33
-#define ECR            34
-#define EMA            35
-#define CEH            36
-#define CEL            37
-#define COUNTER                38
-#define LDCR           39
-#define STCR           40
-#define PSR            41
-
 #define SINGLESTEP16_INSN      0x7006
 #define SINGLESTEP32_INSN      0x840C8000
 #define BREAKPOINT16_INSN      0x7002          /* work on SPG300 */
index cfd5b90a862820c5a55cf69e2f9612ed1ba765b5..78bc97b1d0270d8e5aabe0e22d49d58041e68abd 100644 (file)
@@ -12,9 +12,8 @@ config SH_DMA_IRQ_MULTI
        default y if CPU_SUBTYPE_SH7750  || CPU_SUBTYPE_SH7751  || \
                     CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \
                     CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091  || \
-                    CPU_SUBTYPE_SH7763  || CPU_SUBTYPE_SH7764  || \
-                    CPU_SUBTYPE_SH7780  || CPU_SUBTYPE_SH7785  || \
-                    CPU_SUBTYPE_SH7760
+                    CPU_SUBTYPE_SH7763  || CPU_SUBTYPE_SH7780  || \
+                    CPU_SUBTYPE_SH7785  || CPU_SUBTYPE_SH7760
 
 config SH_DMA_API
        depends on SH_DMA
index 4d48f1436a63b72a34f8201ad8cb88416bc1a37d..c727e6ddf69e180f4c01147e371fb722733af7b2 100644 (file)
@@ -34,6 +34,17 @@ static inline void outl(unsigned int x, unsigned long port)
        BUG();
 }
 
+static inline void __iomem *ioport_map(unsigned long port, unsigned int size)
+{
+       BUG();
+       return NULL;
+}
+
+static inline void ioport_unmap(void __iomem *addr)
+{
+       BUG();
+}
+
 #define inb_p(addr)    inb(addr)
 #define inw_p(addr)    inw(addr)
 #define inl_p(addr)    inl(addr)
index 02788b6a03b7c9654711b9010ec6cedc1a5edc65..9cd81e54056ac477f1a8091dafd5b736605f7578 100644 (file)
@@ -32,7 +32,6 @@
 #define CHCR_TS_HIGH_SHIFT     (20 - 2)        /* 2 bits for shifted low TS */
 #elif defined(CONFIG_CPU_SUBTYPE_SH7757) || \
        defined(CONFIG_CPU_SUBTYPE_SH7763) || \
-       defined(CONFIG_CPU_SUBTYPE_SH7764) || \
        defined(CONFIG_CPU_SUBTYPE_SH7780) || \
        defined(CONFIG_CPU_SUBTYPE_SH7785)
 #define CHCR_TS_LOW_MASK       0x00000018
index 89afb650ce2524b94fa292fbea1bd59b17f6fb22..8ceccceae84419d3a3733128d13e7321a4908266 100644 (file)
@@ -14,8 +14,7 @@
 #define DMTE4_IRQ      evt2irq(0xb80)
 #define DMAE0_IRQ      evt2irq(0xbc0)  /* DMA Error IRQ*/
 #define SH_DMAC_BASE0  0xFE008020
-#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
-       defined(CONFIG_CPU_SUBTYPE_SH7764)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7763)
 #define DMTE0_IRQ      evt2irq(0x640)
 #define DMTE4_IRQ      evt2irq(0x780)
 #define DMAE0_IRQ      evt2irq(0x6c0)
index f579dd528198919914d9433984b02df7084d90f0..c187b9579c212bdd45c5ead12410775f0598bd48 100644 (file)
@@ -307,7 +307,7 @@ static struct clk_lookup lookups[] = {
        CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[HWBLK_TMU0]),
        CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[HWBLK_TMU1]),
 
-       CLKDEV_ICK_ID("fck", "sh-cmt-16.0", &mstp_clks[HWBLK_CMT]),
+       CLKDEV_ICK_ID("fck", "sh-cmt-32.0", &mstp_clks[HWBLK_CMT]),
        CLKDEV_DEV_ID("sh-wdt.0", &mstp_clks[HWBLK_RWDT]),
        CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[HWBLK_DMAC1]),
 
@@ -332,6 +332,8 @@ static struct clk_lookup lookups[] = {
        CLKDEV_CON_ID("tsif0", &mstp_clks[HWBLK_TSIF]),
        CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[HWBLK_USB1]),
        CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[HWBLK_USB0]),
+       CLKDEV_CON_ID("usb1", &mstp_clks[HWBLK_USB1]),
+       CLKDEV_CON_ID("usb0", &mstp_clks[HWBLK_USB0]),
        CLKDEV_CON_ID("2dg0", &mstp_clks[HWBLK_2DG]),
        CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[HWBLK_SDHI0]),
        CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[HWBLK_SDHI1]),
index 552c8fcf9416413f3b8ebf606371d88cb85eebeb..d6d0a986c6e937680186a32bbfd602c5ff40d10e 100644 (file)
@@ -80,10 +80,8 @@ static int __init rtc_generic_init(void)
                return -ENODEV;
 
        pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
-       if (IS_ERR(pdev))
-               return PTR_ERR(pdev);
 
-       return 0;
+       return PTR_ERR_OR_ZERO(pdev);
 }
 module_init(rtc_generic_init);
 
index 74c03ecc48718c6e41abb7b59f50f715eb992681..ecfc6b0c1da1061d781b49d9a6e9cb2713d09bfa 100644 (file)
@@ -67,10 +67,8 @@ static int __init asids_debugfs_init(void)
                                           NULL, &asids_debugfs_fops);
        if (!asids_dentry)
                return -ENOMEM;
-       if (IS_ERR(asids_dentry))
-               return PTR_ERR(asids_dentry);
 
-       return 0;
+       return PTR_ERR_OR_ZERO(asids_dentry);
 }
 module_init(asids_debugfs_init);
 
index 2d089fe2cba910153e8d9bf1db927fd18ca9a83d..2790b6a64157f79663fe5232afe9f857e6d81cb7 100644 (file)
@@ -495,8 +495,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
        pgdat = NODE_DATA(nid);
 
        /* We only have ZONE_NORMAL, so this is easy.. */
-       ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
-                               start_pfn, nr_pages);
+       ret = __add_pages(nid, pgdat->node_zones +
+                       zone_for_memory(nid, start, size, ZONE_NORMAL),
+                       start_pfn, nr_pages);
        if (unlikely(ret))
                printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 
index 4918d91bc3a660942a6aec8585be2bb41425d3df..d19b13e3a59fc967e175a5d51e829f98aac763dd 100644 (file)
@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
        area->nr_pages = npages;
        area->pages = pages;
 
-       if (map_vm_area(area, prot_rwx, &pages)) {
+       if (map_vm_area(area, prot_rwx, pages)) {
                vunmap(area->addr);
                goto error;
        }
index 1dbade870f90dd8b27140e10275df6d0610506be..a241946815131904498ae6ecd95d87c4e28bf2f9 100644 (file)
@@ -1218,7 +1218,8 @@ good_area:
        /*
         * If for any reason at all we couldn't handle the fault,
         * make sure we exit gracefully rather than endlessly redo
-        * the fault:
+        * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
+        * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
         */
        fault = handle_mm_fault(mm, vma, address, flags);
 
index e39504878aecd22f6688073bd3fca80f39bfd9c1..7d05565ba7813047cfc4f4d96d339f9af0e5c3d3 100644 (file)
@@ -825,7 +825,8 @@ void __init mem_init(void)
 int arch_add_memory(int nid, u64 start, u64 size)
 {
        struct pglist_data *pgdata = NODE_DATA(nid);
-       struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
+       struct zone *zone = pgdata->node_zones +
+               zone_for_memory(nid, start, size, ZONE_HIGHMEM);
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
 
index df1a9927ad29ef9aa727851e775badb0f43a5416..5621c47d7a1a0e7274c7fd49b6aa7602d952bd74 100644 (file)
@@ -691,7 +691,8 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
 int arch_add_memory(int nid, u64 start, u64 size)
 {
        struct pglist_data *pgdat = NODE_DATA(nid);
-       struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+       struct zone *zone = pgdat->node_zones +
+               zone_for_memory(nid, start, size, ZONE_NORMAL);
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
        int ret;
index e65d400efd449a9030f5ccbef1a27d68587aa797..e1b92788c225875efb978f21f8cd25f817f68a52 100644 (file)
@@ -16,6 +16,7 @@ menuconfig ATA
        depends on BLOCK
        depends on !(M32R || M68K || S390) || BROKEN
        select SCSI
+       select GLOB
        ---help---
          If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or
          any other ATA device under Linux, say Y and make sure that you know
index 677c0c1b03bd658322cad2faf5becd86ce1db3cd..dbdc5d32343f53f7ed96a94cf2d6d69258a54237 100644 (file)
@@ -59,6 +59,7 @@
 #include <linux/async.h>
 #include <linux/log2.h>
 #include <linux/slab.h>
+#include <linux/glob.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
@@ -4250,73 +4251,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { }
 };
 
-/**
- *     glob_match - match a text string against a glob-style pattern
- *     @text: the string to be examined
- *     @pattern: the glob-style pattern to be matched against
- *
- *     Either/both of text and pattern can be empty strings.
- *
- *     Match text against a glob-style pattern, with wildcards and simple sets:
- *
- *             ?       matches any single character.
- *             *       matches any run of characters.
- *             [xyz]   matches a single character from the set: x, y, or z.
- *             [a-d]   matches a single character from the range: a, b, c, or d.
- *             [a-d0-9] matches a single character from either range.
- *
- *     The special characters ?, [, -, or *, can be matched using a set, eg. [*]
- *     Behaviour with malformed patterns is undefined, though generally reasonable.
- *
- *     Sample patterns:  "SD1?",  "SD1[0-5]",  "*R0",  "SD*1?[012]*xx"
- *
- *     This function uses one level of recursion per '*' in pattern.
- *     Since it calls _nothing_ else, and has _no_ explicit local variables,
- *     this will not cause stack problems for any reasonable use here.
- *
- *     RETURNS:
- *     0 on match, 1 otherwise.
- */
-static int glob_match (const char *text, const char *pattern)
-{
-       do {
-               /* Match single character or a '?' wildcard */
-               if (*text == *pattern || *pattern == '?') {
-                       if (!*pattern++)
-                               return 0;  /* End of both strings: match */
-               } else {
-                       /* Match single char against a '[' bracketed ']' pattern set */
-                       if (!*text || *pattern != '[')
-                               break;  /* Not a pattern set */
-                       while (*++pattern && *pattern != ']' && *text != *pattern) {
-                               if (*pattern == '-' && *(pattern - 1) != '[')
-                                       if (*text > *(pattern - 1) && *text < *(pattern + 1)) {
-                                               ++pattern;
-                                               break;
-                                       }
-                       }
-                       if (!*pattern || *pattern == ']')
-                               return 1;  /* No match */
-                       while (*pattern && *pattern++ != ']');
-               }
-       } while (*++text && *pattern);
-
-       /* Match any run of chars against a '*' wildcard */
-       if (*pattern == '*') {
-               if (!*++pattern)
-                       return 0;  /* Match: avoid recursion at end of pattern */
-               /* Loop to handle additional pattern chars after the wildcard */
-               while (*text) {
-                       if (glob_match(text, pattern) == 0)
-                               return 0;  /* Remainder matched */
-                       ++text;  /* Absorb (match) this char and try again */
-               }
-       }
-       if (!*text && !*pattern)
-               return 0;  /* End of both strings: match */
-       return 1;  /* No match */
-}
-
 static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
 {
        unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -4327,10 +4261,10 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
        ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev));
 
        while (ad->model_num) {
-               if (!glob_match(model_num, ad->model_num)) {
+               if (glob_match(model_num, ad->model_num)) {
                        if (ad->model_rev == NULL)
                                return ad->horkage;
-                       if (!glob_match(model_rev, ad->model_rev))
+                       if (glob_match(model_rev, ad->model_rev))
                                return ad->horkage;
                }
                ad++;
index 88500fed3c7a41f13c3cea282d7241c13d2f940c..4e7f0ff83ae7b162c43355b6727019c7edfbe897 100644 (file)
@@ -289,16 +289,6 @@ config CMA_ALIGNMENT
 
          If unsure, leave the default value "8".
 
-config CMA_AREAS
-       int "Maximum count of the CMA device-private areas"
-       default 7
-       help
-         CMA allows to create CMA areas for particular devices. This parameter
-         sets the maximum number of such device private CMA areas in the
-         system.
-
-         If unsure, leave the default value "7".
-
 endif
 
 endmenu
index 6467c919c50993ebfc32e14b2ac2860c0b3beae8..6606abdf880c816ce407c486fbabcc27d602cef9 100644 (file)
 
 #include <linux/memblock.h>
 #include <linux/err.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-isolation.h>
 #include <linux/sizes.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/mm_types.h>
 #include <linux/dma-contiguous.h>
-
-struct cma {
-       unsigned long   base_pfn;
-       unsigned long   count;
-       unsigned long   *bitmap;
-       struct mutex    lock;
-};
-
-struct cma *dma_contiguous_default_area;
+#include <linux/cma.h>
 
 #ifdef CONFIG_CMA_SIZE_MBYTES
 #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -48,6 +34,8 @@ struct cma *dma_contiguous_default_area;
 #define CMA_SIZE_MBYTES 0
 #endif
 
+struct cma *dma_contiguous_default_area;
+
 /*
  * Default global CMA area size can be defined in kernel's .config.
  * This is useful mainly for distro maintainers to create a kernel
@@ -154,65 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
        }
 }
 
-static DEFINE_MUTEX(cma_mutex);
-
-static int __init cma_activate_area(struct cma *cma)
-{
-       int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
-       unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
-       unsigned i = cma->count >> pageblock_order;
-       struct zone *zone;
-
-       cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-
-       if (!cma->bitmap)
-               return -ENOMEM;
-
-       WARN_ON_ONCE(!pfn_valid(pfn));
-       zone = page_zone(pfn_to_page(pfn));
-
-       do {
-               unsigned j;
-               base_pfn = pfn;
-               for (j = pageblock_nr_pages; j; --j, pfn++) {
-                       WARN_ON_ONCE(!pfn_valid(pfn));
-                       /*
-                        * alloc_contig_range requires the pfn range
-                        * specified to be in the same zone. Make this
-                        * simple by forcing the entire CMA resv range
-                        * to be in the same zone.
-                        */
-                       if (page_zone(pfn_to_page(pfn)) != zone)
-                               goto err;
-               }
-               init_cma_reserved_pageblock(pfn_to_page(base_pfn));
-       } while (--i);
-
-       mutex_init(&cma->lock);
-       return 0;
-
-err:
-       kfree(cma->bitmap);
-       return -EINVAL;
-}
-
-static struct cma cma_areas[MAX_CMA_AREAS];
-static unsigned cma_area_count;
-
-static int __init cma_init_reserved_areas(void)
-{
-       int i;
-
-       for (i = 0; i < cma_area_count; i++) {
-               int ret = cma_activate_area(&cma_areas[i]);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-core_initcall(cma_init_reserved_areas);
-
 /**
  * dma_contiguous_reserve_area() - reserve custom contiguous area
  * @size: Size of the reserved area (in bytes),
@@ -234,72 +163,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
                                       phys_addr_t limit, struct cma **res_cma,
                                       bool fixed)
 {
-       struct cma *cma = &cma_areas[cma_area_count];
-       phys_addr_t alignment;
-       int ret = 0;
-
-       pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
-                (unsigned long)size, (unsigned long)base,
-                (unsigned long)limit);
-
-       /* Sanity checks */
-       if (cma_area_count == ARRAY_SIZE(cma_areas)) {
-               pr_err("Not enough slots for CMA reserved regions!\n");
-               return -ENOSPC;
-       }
-
-       if (!size)
-               return -EINVAL;
-
-       /* Sanitise input arguments */
-       alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
-       base = ALIGN(base, alignment);
-       size = ALIGN(size, alignment);
-       limit &= ~(alignment - 1);
-
-       /* Reserve memory */
-       if (base && fixed) {
-               if (memblock_is_region_reserved(base, size) ||
-                   memblock_reserve(base, size) < 0) {
-                       ret = -EBUSY;
-                       goto err;
-               }
-       } else {
-               phys_addr_t addr = memblock_alloc_range(size, alignment, base,
-                                                       limit);
-               if (!addr) {
-                       ret = -ENOMEM;
-                       goto err;
-               } else {
-                       base = addr;
-               }
-       }
-
-       /*
-        * Each reserved area must be initialised later, when more kernel
-        * subsystems (like slab allocator) are available.
-        */
-       cma->base_pfn = PFN_DOWN(base);
-       cma->count = size >> PAGE_SHIFT;
-       *res_cma = cma;
-       cma_area_count++;
+       int ret;
 
-       pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
-               (unsigned long)base);
+       ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma);
+       if (ret)
+               return ret;
 
        /* Architecture specific contiguous memory fixup. */
-       dma_contiguous_early_fixup(base, size);
-       return 0;
-err:
-       pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-       return ret;
-}
+       dma_contiguous_early_fixup(cma_get_base(*res_cma),
+                               cma_get_size(*res_cma));
 
-static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
-{
-       mutex_lock(&cma->lock);
-       bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
-       mutex_unlock(&cma->lock);
+       return 0;
 }
 
 /**
@@ -316,62 +190,10 @@ static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
 struct page *dma_alloc_from_contiguous(struct device *dev, int count,
                                       unsigned int align)
 {
-       unsigned long mask, pfn, pageno, start = 0;
-       struct cma *cma = dev_get_cma_area(dev);
-       struct page *page = NULL;
-       int ret;
-
-       if (!cma || !cma->count)
-               return NULL;
-
        if (align > CONFIG_CMA_ALIGNMENT)
                align = CONFIG_CMA_ALIGNMENT;
 
-       pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
-                count, align);
-
-       if (!count)
-               return NULL;
-
-       mask = (1 << align) - 1;
-
-
-       for (;;) {
-               mutex_lock(&cma->lock);
-               pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
-                                                   start, count, mask);
-               if (pageno >= cma->count) {
-                       mutex_unlock(&cma->lock);
-                       break;
-               }
-               bitmap_set(cma->bitmap, pageno, count);
-               /*
-                * It's safe to drop the lock here. We've marked this region for
-                * our exclusive use. If the migration fails we will take the
-                * lock again and unmark it.
-                */
-               mutex_unlock(&cma->lock);
-
-               pfn = cma->base_pfn + pageno;
-               mutex_lock(&cma_mutex);
-               ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
-               mutex_unlock(&cma_mutex);
-               if (ret == 0) {
-                       page = pfn_to_page(pfn);
-                       break;
-               } else if (ret != -EBUSY) {
-                       clear_cma_bitmap(cma, pfn, count);
-                       break;
-               }
-               clear_cma_bitmap(cma, pfn, count);
-               pr_debug("%s(): memory range at %p is busy, retrying\n",
-                        __func__, pfn_to_page(pfn));
-               /* try again with a bit different memory target */
-               start = pageno + mask + 1;
-       }
-
-       pr_debug("%s(): returned %p\n", __func__, page);
-       return page;
+       return cma_alloc(dev_get_cma_area(dev), count, align);
 }
 
 /**
@@ -387,23 +209,5 @@ struct page *dma_alloc_from_contiguous(struct device *dev, int count,
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
                                 int count)
 {
-       struct cma *cma = dev_get_cma_area(dev);
-       unsigned long pfn;
-
-       if (!cma || !pages)
-               return false;
-
-       pr_debug("%s(page %p)\n", __func__, (void *)pages);
-
-       pfn = page_to_pfn(pages);
-
-       if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
-               return false;
-
-       VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
-
-       free_contig_range(pfn, count);
-       clear_cma_bitmap(cma, pfn, count);
-
-       return true;
+       return cma_release(dev_get_cma_area(dev), pages, count);
 }
index 89f752dd8465acdd1f9a7696c34f8dfc572c31e3..a2e13e250bba2f54eea93e72e9692340d3298d66 100644 (file)
@@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev)
         * attribute and need to set the online_type.
         */
        if (mem->online_type < 0)
-               mem->online_type = ONLINE_KEEP;
+               mem->online_type = MMOP_ONLINE_KEEP;
 
        ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
 
@@ -315,23 +315,23 @@ store_mem_state(struct device *dev,
        if (ret)
                return ret;
 
-       if (!strncmp(buf, "online_kernel", min_t(int, count, 13)))
-               online_type = ONLINE_KERNEL;
-       else if (!strncmp(buf, "online_movable", min_t(int, count, 14)))
-               online_type = ONLINE_MOVABLE;
-       else if (!strncmp(buf, "online", min_t(int, count, 6)))
-               online_type = ONLINE_KEEP;
-       else if (!strncmp(buf, "offline", min_t(int, count, 7)))
-               online_type = -1;
+       if (sysfs_streq(buf, "online_kernel"))
+               online_type = MMOP_ONLINE_KERNEL;
+       else if (sysfs_streq(buf, "online_movable"))
+               online_type = MMOP_ONLINE_MOVABLE;
+       else if (sysfs_streq(buf, "online"))
+               online_type = MMOP_ONLINE_KEEP;
+       else if (sysfs_streq(buf, "offline"))
+               online_type = MMOP_OFFLINE;
        else {
                ret = -EINVAL;
                goto err;
        }
 
        switch (online_type) {
-       case ONLINE_KERNEL:
-       case ONLINE_MOVABLE:
-       case ONLINE_KEEP:
+       case MMOP_ONLINE_KERNEL:
+       case MMOP_ONLINE_MOVABLE:
+       case MMOP_ONLINE_KEEP:
                /*
                 * mem->online_type is not protected so there can be a
                 * race here.  However, when racing online, the first
@@ -342,7 +342,7 @@ store_mem_state(struct device *dev,
                mem->online_type = online_type;
                ret = device_online(&mem->dev);
                break;
-       case -1:
+       case MMOP_OFFLINE:
                ret = device_offline(&mem->dev);
                break;
        default:
@@ -406,7 +406,9 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
        int i, ret;
        unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
 
-       phys_addr = simple_strtoull(buf, NULL, 0);
+       ret = kstrtoull(buf, 0, &phys_addr);
+       if (ret)
+               return ret;
 
        if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
                return -EINVAL;
index 8f7ed9933a7c6939d7703badeeacb97fcfc803aa..c6d3ae05f1ca4d98667490d7db0b6375f61b9c26 100644 (file)
@@ -126,7 +126,7 @@ static ssize_t node_read_meminfo(struct device *dev,
                       nid, K(node_page_state(nid, NR_FILE_PAGES)),
                       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
                       nid, K(node_page_state(nid, NR_ANON_PAGES)),
-                      nid, K(node_page_state(nid, NR_SHMEM)),
+                      nid, K(i.sharedram),
                       nid, node_page_state(nid, NR_KERNEL_STACK) *
                                THREAD_SIZE / 1024,
                       nid, K(node_page_state(nid, NR_PAGETABLE)),
index 36e54be402df30b68c579e09c588193f5f015e00..dfa4024c448a6222d8d12ffb2f05e1976652fca0 100644 (file)
@@ -183,19 +183,32 @@ static ssize_t comp_algorithm_store(struct device *dev,
 static int zram_test_flag(struct zram_meta *meta, u32 index,
                        enum zram_pageflags flag)
 {
-       return meta->table[index].flags & BIT(flag);
+       return meta->table[index].value & BIT(flag);
 }
 
 static void zram_set_flag(struct zram_meta *meta, u32 index,
                        enum zram_pageflags flag)
 {
-       meta->table[index].flags |= BIT(flag);
+       meta->table[index].value |= BIT(flag);
 }
 
 static void zram_clear_flag(struct zram_meta *meta, u32 index,
                        enum zram_pageflags flag)
 {
-       meta->table[index].flags &= ~BIT(flag);
+       meta->table[index].value &= ~BIT(flag);
+}
+
+static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+{
+       return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+
+static void zram_set_obj_size(struct zram_meta *meta,
+                                       u32 index, size_t size)
+{
+       unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+
+       meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
 }
 
 static inline int is_partial_io(struct bio_vec *bvec)
@@ -255,7 +268,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
                goto free_table;
        }
 
-       rwlock_init(&meta->tb_lock);
        return meta;
 
 free_table:
@@ -304,7 +316,12 @@ static void handle_zero_page(struct bio_vec *bvec)
        flush_dcache_page(page);
 }
 
-/* NOTE: caller should hold meta->tb_lock with write-side */
+
+/*
+ * To protect concurrent access to the same index entry,
+ * caller should hold this table index entry's bit_spinlock to
+ * indicate this index entry is accessing.
+ */
 static void zram_free_page(struct zram *zram, size_t index)
 {
        struct zram_meta *meta = zram->meta;
@@ -324,11 +341,12 @@ static void zram_free_page(struct zram *zram, size_t index)
 
        zs_free(meta->mem_pool, handle);
 
-       atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size);
+       atomic64_sub(zram_get_obj_size(meta, index),
+                       &zram->stats.compr_data_size);
        atomic64_dec(&zram->stats.pages_stored);
 
        meta->table[index].handle = 0;
-       meta->table[index].size = 0;
+       zram_set_obj_size(meta, index, 0);
 }
 
 static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
@@ -337,14 +355,14 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
        unsigned char *cmem;
        struct zram_meta *meta = zram->meta;
        unsigned long handle;
-       u16 size;
+       size_t size;
 
-       read_lock(&meta->tb_lock);
+       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
        handle = meta->table[index].handle;
-       size = meta->table[index].size;
+       size = zram_get_obj_size(meta, index);
 
        if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
-               read_unlock(&meta->tb_lock);
+               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                clear_page(mem);
                return 0;
        }
@@ -355,7 +373,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
        else
                ret = zcomp_decompress(zram->comp, cmem, size, mem);
        zs_unmap_object(meta->mem_pool, handle);
-       read_unlock(&meta->tb_lock);
+       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
 
        /* Should NEVER happen. Return bio error if it does. */
        if (unlikely(ret)) {
@@ -376,14 +394,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
        struct zram_meta *meta = zram->meta;
        page = bvec->bv_page;
 
-       read_lock(&meta->tb_lock);
+       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
        if (unlikely(!meta->table[index].handle) ||
                        zram_test_flag(meta, index, ZRAM_ZERO)) {
-               read_unlock(&meta->tb_lock);
+               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                handle_zero_page(bvec);
                return 0;
        }
-       read_unlock(&meta->tb_lock);
+       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
 
        if (is_partial_io(bvec))
                /* Use  a temporary buffer to decompress the page */
@@ -461,10 +479,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
        if (page_zero_filled(uncmem)) {
                kunmap_atomic(user_mem);
                /* Free memory associated with this sector now. */
-               write_lock(&zram->meta->tb_lock);
+               bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
                zram_free_page(zram, index);
                zram_set_flag(meta, index, ZRAM_ZERO);
-               write_unlock(&zram->meta->tb_lock);
+               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
 
                atomic64_inc(&zram->stats.zero_pages);
                ret = 0;
@@ -514,12 +532,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
         * Free memory associated with this sector
         * before overwriting unused sectors.
         */
-       write_lock(&zram->meta->tb_lock);
+       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
        zram_free_page(zram, index);
 
        meta->table[index].handle = handle;
-       meta->table[index].size = clen;
-       write_unlock(&zram->meta->tb_lock);
+       zram_set_obj_size(meta, index, clen);
+       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
 
        /* Update stats */
        atomic64_add(clen, &zram->stats.compr_data_size);
@@ -560,6 +578,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
                             int offset, struct bio *bio)
 {
        size_t n = bio->bi_iter.bi_size;
+       struct zram_meta *meta = zram->meta;
 
        /*
         * zram manages data in physical block size units. Because logical block
@@ -580,13 +599,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
        }
 
        while (n >= PAGE_SIZE) {
-               /*
-                * Discard request can be large so the lock hold times could be
-                * lengthy.  So take the lock once per page.
-                */
-               write_lock(&zram->meta->tb_lock);
+               bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
                zram_free_page(zram, index);
-               write_unlock(&zram->meta->tb_lock);
+               bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
                index++;
                n -= PAGE_SIZE;
        }
@@ -821,9 +836,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
        zram = bdev->bd_disk->private_data;
        meta = zram->meta;
 
-       write_lock(&meta->tb_lock);
+       bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
        zram_free_page(zram, index);
-       write_unlock(&meta->tb_lock);
+       bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
        atomic64_inc(&zram->stats.notify_free);
 }
 
index 7f21c145e317f49e785eafeb1aea78097109e158..5b0afde729cd885286f48d31991f7bc98f44328b 100644 (file)
@@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
 /*-- End of configurable params */
 
 #define SECTOR_SHIFT           9
-#define SECTOR_SIZE            (1 << SECTOR_SHIFT)
 #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
 #define SECTORS_PER_PAGE       (1 << SECTORS_PER_PAGE_SHIFT)
 #define ZRAM_LOGICAL_BLOCK_SHIFT 12
@@ -51,10 +50,24 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
 #define ZRAM_SECTOR_PER_LOGICAL_BLOCK  \
        (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
 
-/* Flags for zram pages (table[page_no].flags) */
+
+/*
+ * The lower ZRAM_FLAG_SHIFT bits of table.value is for
+ * object size (excluding header), the higher bits is for
+ * zram_pageflags.
+ *
+ * zram is mainly used for memory efficiency so we want to keep memory
+ * footprint small so we can squeeze size and flags into a field.
+ * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header),
+ * the higher bits is for zram_pageflags.
+ */
+#define ZRAM_FLAG_SHIFT 24
+
+/* Flags for zram pages (table[page_no].value) */
 enum zram_pageflags {
        /* Page consists entirely of zeros */
-       ZRAM_ZERO,
+       ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1,
+       ZRAM_ACCESS,    /* page in now accessed */
 
        __NR_ZRAM_PAGEFLAGS,
 };
@@ -62,11 +75,10 @@ enum zram_pageflags {
 /*-- Data structures */
 
 /* Allocated for each disk page */
-struct table {
+struct zram_table_entry {
        unsigned long handle;
-       u16 size;       /* object size (excluding header) */
-       u8 flags;
-} __aligned(4);
+       unsigned long value;
+};
 
 struct zram_stats {
        atomic64_t compr_data_size;     /* compressed size of pages stored */
@@ -81,8 +93,7 @@ struct zram_stats {
 };
 
 struct zram_meta {
-       rwlock_t tb_lock;       /* protect table */
-       struct table *table;
+       struct zram_table_entry *table;
        struct zs_pool *mem_pool;
 };
 
index 17cf96c45f2b07eaeaa31f7e7bdba63b3c98f7b7..79f18e6d9c4f346e5f04430a67cf7b1cb7dbd97c 100644 (file)
@@ -286,7 +286,11 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 {
        struct firmware_map_entry *entry;
 
-       entry = firmware_map_find_entry_bootmem(start, end, type);
+       entry = firmware_map_find_entry(start, end - 1, type);
+       if (entry)
+               return 0;
+
+       entry = firmware_map_find_entry_bootmem(start, end - 1, type);
        if (!entry) {
                entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
                if (!entry)
index 7e4bae760e2743532c39bd0d49cae176e2c93b2f..c3b80fd65d6254e89caf3381529f673764286115 100644 (file)
@@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
                parent = &entry->head;
        }
        if (parent) {
-               hlist_add_after_rcu(parent, &item->head);
+               hlist_add_behind_rcu(&item->head, parent);
        } else {
                hlist_add_head_rcu(&item->head, h_list);
        }
index ae208f61219804cbac02bc850582c1ad7cfc5f84..cccef87963e050afb99181d63d3ed5dcf401d5a1 100644 (file)
@@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm,
                        atk_debugfs_gitm_get,
                        NULL,
-                       "0x%08llx\n")
+                       "0x%08llx\n");
 
 static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj)
 {
index 0bf1e4edf04d04a838c9d4cdc8f96aa17dfaa896..6590558d1d31c600b23c8d50f48b4e3b1c461326 100644 (file)
@@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock);
 static __init int map_switcher(void)
 {
        int i, err;
-       struct page **pagep;
 
        /*
         * Map the Switcher in to high memory.
@@ -110,11 +109,9 @@ static __init int map_switcher(void)
         * This code actually sets up the pages we've allocated to appear at
         * switcher_addr.  map_vm_area() takes the vma we allocated above, the
         * kind of pages we're mapping (kernel pages), and a pointer to our
-        * array of struct pages.  It increments that pointer, but we don't
-        * care.
+        * array of struct pages.
         */
-       pagep = lg_switcher_pages;
-       err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
+       err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages);
        if (err) {
                printk("lguest: map_vm_area failed: %i\n", err);
                goto free_vma;
index 681a9e81ff512aa874406d6e549485815c29a55d..e8ba7470700af1abaaf33826c7bffc8409666b9e 100644 (file)
@@ -1948,7 +1948,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
 
        /* add filter to the list */
        if (parent)
-               hlist_add_after(&parent->fdir_node, &input->fdir_node);
+               hlist_add_behind(&input->fdir_node, &parent->fdir_node);
        else
                hlist_add_head(&input->fdir_node,
                               &pf->fdir_filter_list);
index 94a1c07efeb0b8b6b1915084bdb7d52c0a5aaacb..e4100b5737b67a3cb862428e120e67d51bd1b7fd 100644 (file)
@@ -2517,7 +2517,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 
        /* add filter to the list */
        if (parent)
-               hlist_add_after(&parent->fdir_node, &input->fdir_node);
+               hlist_add_behind(&input->fdir_node, &parent->fdir_node);
        else
                hlist_add_head(&input->fdir_node,
                               &adapter->fdir_filter_list);
index 02b0379ae5501ec563cff1647ac817b9b61440d8..4f34dc0095b579fef086508ab27d634d9f410660 100644 (file)
@@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 
        for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
                int ret;
-               struct page **page_array_ptr;
 
                page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
 
@@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
                }
                tmp_area.addr = page_addr;
                tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */;
-               page_array_ptr = page;
-               ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr);
+               ret = map_vm_area(&tmp_area, PAGE_KERNEL, page);
                if (ret) {
                        pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
                               proc->pid, page_addr);
index 5dde79418297acf6ddc0a2ec2b8e41ead89b5bd6..8ef1deb59d4a3d3abe6f70862e43a307604d4133 100644 (file)
@@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                            cfs_hash_dhead_t, dh_head);
 
        if (dh->dh_tail != NULL) /* not empty */
-               hlist_add_after(dh->dh_tail, hnode);
+               hlist_add_behind(hnode, dh->dh_tail);
        else /* empty list */
                hlist_add_head(hnode, &dh->dh_head);
        dh->dh_tail = hnode;
@@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
                                                cfs_hash_dhead_dep_t, dd_head);
 
        if (dh->dd_tail != NULL) /* not empty */
-               hlist_add_after(dh->dd_tail, hnode);
+               hlist_add_behind(hnode, dh->dd_tail);
        else /* empty list */
                hlist_add_head(hnode, &dh->dd_head);
        dh->dd_tail = hnode;
index 454b65898e2c6eaa9434b1df84a0958926a0e06d..42bad18c66c938be9cf3c0de786be18c77692454 100644 (file)
@@ -355,7 +355,7 @@ static struct sysrq_key_op sysrq_term_op = {
 
 static void moom_callback(struct work_struct *ignored)
 {
-       out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL,
+       out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL,
                      0, NULL, true);
 }
 
index a31b83c5cbd9ad80e7b5967b48d0c8d1181cbd00..b39d487ccfb0e314096f47515d16ea165d215cc5 100644 (file)
@@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write,
        return ret;
 }
 
-struct ctl_table fscache_sysctls[] = {
+static struct ctl_table fscache_sysctls[] = {
        {
                .procname       = "object_max_active",
                .data           = &fscache_object_max_active,
@@ -87,7 +87,7 @@ struct ctl_table fscache_sysctls[] = {
        {}
 };
 
-struct ctl_table fscache_sysctls_root[] = {
+static struct ctl_table fscache_sysctls_root[] = {
        {
                .procname       = "fscache",
                .mode           = 0555,
index 48140315f62770d622c628f906dede95a6c6bda0..380d86e1ab450b2ed02c9011ec9843ba0e2d2f6c 100644 (file)
@@ -1019,11 +1019,11 @@ static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
 /**
  * logfs_is_valid_block - check whether this block is still valid
  *
- * @sb - superblock
- * @ofs        - block physical offset
- * @ino        - block inode number
- * @bix        - block index
- * @level - block level
+ * @sb:                superblock
+ * @ofs:       block physical offset
+ * @ino:       block inode number
+ * @bix:       block index
+ * @gc_level:  block level
  *
  * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
  * become invalid once the journal is written.
@@ -2226,10 +2226,9 @@ void btree_write_block(struct logfs_block *block)
  *
  * @inode:             parent inode (ifile or directory)
  * @buf:               object to write (inode or dentry)
- * @n:                 object size
- * @_pos:              object number (file position in blocks/objects)
+ * @count:             object size
+ * @bix:               block index
  * @flags:             write flags
- * @lock:              0 if write lock is already taken, 1 otherwise
  * @shadow_tree:       shadow below this inode
  *
  * FIXME: All caller of this put a 200-300 byte variable on the stack,
index 182bc41cd88711d593c4d997171c6ad483a87577..2a1447c946e7c212071eac5243a5538a499cd0fa 100644 (file)
@@ -798,7 +798,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
        list_splice(&head, n->list.prev);
 
        if (shadows)
-               hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+               hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
        else
                hlist_add_head_rcu(&mnt->mnt_hash,
                                m_hash(&parent->mnt, mnt->mnt_mountpoint));
index ee9cb3795c2b14c53179d10f485e33ed613d0c16..30d3addfad7583c722b281675c4e14af76baadd3 100644 (file)
@@ -70,8 +70,15 @@ static int fanotify_get_response(struct fsnotify_group *group,
        wait_event(group->fanotify_data.access_waitq, event->response ||
                                atomic_read(&group->fanotify_data.bypass_perm));
 
-       if (!event->response) /* bypass_perm set */
+       if (!event->response) { /* bypass_perm set */
+               /*
+                * Event was canceled because group is being destroyed. Remove
+                * it from group's event list because we are responsible for
+                * freeing the permission event.
+                */
+               fsnotify_remove_event(group, &event->fae.fse);
                return 0;
+       }
 
        /* userspace responded, convert to something usable */
        switch (event->response) {
@@ -210,7 +217,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
                return -ENOMEM;
 
        fsn_event = &event->fse;
-       ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge);
+       ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
        if (ret) {
                /* Permission events shouldn't be merged */
                BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
index 3fdc8a3e113464b71dde78cd43ad6357e037df36..b13992a41bd94312eeecd59b4e0841a58d1d85c0 100644 (file)
@@ -66,7 +66,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
        /* held the notification_mutex the whole time, so this is the
         * same event we peeked above */
-       return fsnotify_remove_notify_event(group);
+       return fsnotify_remove_first_event(group);
 }
 
 static int create_fd(struct fsnotify_group *group,
@@ -359,6 +359,11 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
        struct fanotify_perm_event_info *event, *next;
 
+       /*
+        * There may be still new events arriving in the notification queue
+        * but since userspace cannot use fanotify fd anymore, no event can
+        * enter or leave access_list by now.
+        */
        spin_lock(&group->fanotify_data.access_lock);
 
        atomic_inc(&group->fanotify_data.bypass_perm);
@@ -373,6 +378,13 @@ static int fanotify_release(struct inode *ignored, struct file *file)
        }
        spin_unlock(&group->fanotify_data.access_lock);
 
+       /*
+        * Since bypass_perm is set, newly queued events will not wait for
+        * access response. Wake up the already sleeping ones now.
+        * synchronize_srcu() in fsnotify_destroy_group() will wait for all
+        * processes sleeping in fanotify_handle_event() waiting for access
+        * response and thus also for all permission events to be freed.
+        */
        wake_up(&group->fanotify_data.access_waitq);
 #endif
 
index 74825be65b7bbec7eec8df37dda142479c00eb3d..9ce062218de9cf2559c02b92a7a201224d1d67fc 100644 (file)
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
        BUG_ON(last == NULL);
        /* mark should be the last entry.  last is the current last entry */
-       hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
+       hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
 out:
        fsnotify_recalc_inode_mask_locked(inode);
        spin_unlock(&inode->i_lock);
index 43ab1e1a07a20acaca5a4741487ac5121e00bbd4..0f88bc0b4e6cfd31fbcd030c5256384fa844b265 100644 (file)
@@ -108,7 +108,7 @@ int inotify_handle_event(struct fsnotify_group *group,
        if (len)
                strcpy(event->name, file_name);
 
-       ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
+       ret = fsnotify_add_event(group, fsn_event, inotify_merge);
        if (ret) {
                /* Our event wasn't used in the end. Free it. */
                fsnotify_destroy_event(group, fsn_event);
index cc423a30a0c804321d4fb6c62558a1dc4b786e73..daf76652fe58c0534f355fd59f11ffbddcc43097 100644 (file)
@@ -149,7 +149,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
        if (fsnotify_notify_queue_is_empty(group))
                return NULL;
 
-       event = fsnotify_peek_notify_event(group);
+       event = fsnotify_peek_first_event(group);
 
        pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
@@ -159,7 +159,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
        /* held the notification_mutex the whole time, so this is the
         * same event we peeked above */
-       fsnotify_remove_notify_event(group);
+       fsnotify_remove_first_event(group);
 
        return event;
 }
index 1e58402171a56cd9d078ab62307951689a5630df..a95d8e037aebe24ba36421861d3abaad6f7dfc89 100644 (file)
@@ -73,7 +73,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
        /* Overflow events are per-group and we don't want to free them */
        if (!event || event->mask == FS_Q_OVERFLOW)
                return;
-
+       /* If the event is still queued, we have a problem... */
+       WARN_ON(!list_empty(&event->list));
        group->ops->free_event(event);
 }
 
@@ -83,10 +84,10 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
  * added to the queue, 1 if the event was merged with some other queued event,
  * 2 if the queue of events has overflown.
  */
-int fsnotify_add_notify_event(struct fsnotify_group *group,
-                             struct fsnotify_event *event,
-                             int (*merge)(struct list_head *,
-                                          struct fsnotify_event *))
+int fsnotify_add_event(struct fsnotify_group *group,
+                      struct fsnotify_event *event,
+                      int (*merge)(struct list_head *,
+                                   struct fsnotify_event *))
 {
        int ret = 0;
        struct list_head *list = &group->notification_list;
@@ -124,11 +125,26 @@ queue:
        return ret;
 }
 
+/*
+ * Remove @event from group's notification queue. It is the responsibility of
+ * the caller to destroy the event.
+ */
+void fsnotify_remove_event(struct fsnotify_group *group,
+                          struct fsnotify_event *event)
+{
+       mutex_lock(&group->notification_mutex);
+       if (!list_empty(&event->list)) {
+               list_del_init(&event->list);
+               group->q_len--;
+       }
+       mutex_unlock(&group->notification_mutex);
+}
+
 /*
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
  */
-struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
 {
        struct fsnotify_event *event;
 
@@ -140,7 +156,7 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
                                 struct fsnotify_event, list);
        /*
         * We need to init list head for the case of overflow event so that
-        * check in fsnotify_add_notify_events() works
+        * check in fsnotify_add_event() works
         */
        list_del_init(&event->list);
        group->q_len--;
@@ -149,9 +165,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
 }
 
 /*
- * This will not remove the event, that must be done with fsnotify_remove_notify_event()
+ * This will not remove the event, that must be done with
+ * fsnotify_remove_first_event()
  */
-struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
+struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
 {
        BUG_ON(!mutex_is_locked(&group->notification_mutex));
 
@@ -169,7 +186,7 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
 
        mutex_lock(&group->notification_mutex);
        while (!fsnotify_notify_queue_is_empty(group)) {
-               event = fsnotify_remove_notify_event(group);
+               event = fsnotify_remove_first_event(group);
                fsnotify_destroy_event(group, event);
        }
        mutex_unlock(&group->notification_mutex);
index 68ca5a8704b5a385e38ae696835dd831d3202691..ac851e8376b1931d88adcf4ff5eaa8bd2445a635 100644 (file)
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 
        BUG_ON(last == NULL);
        /* mark should be the last entry.  last is the current last entry */
-       hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
+       hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
 out:
        fsnotify_recalc_vfsmount_mask_locked(mnt);
        spin_unlock(&mnt->mnt_root->d_lock);
index 5c9e2c81cb11db029ece7873766041ada8c65024..f5ec1ce7a53284969d600b4fb4027fc9e387d40d 100644 (file)
@@ -74,8 +74,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
  * ntfs_attr_extend_initialized - extend the initialized size of an attribute
  * @ni:                        ntfs inode of the attribute to extend
  * @new_init_size:     requested new initialized size in bytes
- * @cached_page:       store any allocated but unused page here
- * @lru_pvec:          lru-buffering pagevec of the caller
  *
  * Extend the initialized size of an attribute described by the ntfs inode @ni
  * to @new_init_size bytes.  This involves zeroing any non-sparse space between
@@ -395,7 +393,6 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
  * @nr_pages:  number of page cache pages to obtain
  * @pages:     array of pages in which to return the obtained page cache pages
  * @cached_page: allocated but as yet unused page
- * @lru_pvec:  lru-buffering pagevec of caller
  *
  * Obtain @nr_pages locked page cache pages from the mapping @mapping and
  * starting at index @index.
index 9d8fcf2f3b947f0fe2a5feda7c0a849f154f349c..a93bf98922565ab120d85f995f0cb565deb1e144 100644 (file)
@@ -4961,6 +4961,15 @@ leftright:
 
                el = path_leaf_el(path);
                split_index = ocfs2_search_extent_list(el, cpos);
+               if (split_index == -1) {
+                       ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+                                       "Owner %llu has an extent at cpos %u "
+                                       "which can no longer be found.\n",
+                                       (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+                                       cpos);
+                       ret = -EROFS;
+                       goto out;
+               }
                goto leftright;
        }
 out:
@@ -5135,7 +5144,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
        el = path_leaf_el(left_path);
 
        index = ocfs2_search_extent_list(el, cpos);
-       if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+       if (index == -1) {
                ocfs2_error(sb,
                            "Owner %llu has an extent at cpos %u which can no "
                            "longer be found.\n",
@@ -5491,7 +5500,7 @@ int ocfs2_remove_extent(handle_t *handle,
 
        el = path_leaf_el(path);
        index = ocfs2_search_extent_list(el, cpos);
-       if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+       if (index == -1) {
                ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
                            "Owner %llu has an extent at cpos %u which can no "
                            "longer be found.\n",
@@ -5557,7 +5566,7 @@ int ocfs2_remove_extent(handle_t *handle,
 
                el = path_leaf_el(path);
                index = ocfs2_search_extent_list(el, cpos);
-               if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+               if (index == -1) {
                        ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
                                    "Owner %llu: split at cpos %u lost record.",
                                    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
index 39efc5057a36d3106391012925e2a8a56758e24e..3fcf205ee900acb87eaa5f74f7c28949ee95bec4 100644 (file)
@@ -1923,12 +1923,11 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
                                goto bail;
                        }
 
-                       if (total_backoff >
-                           msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+                       if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) {
                                status = -ERESTARTSYS;
                                mlog(ML_NOTICE, "Timed out joining dlm domain "
                                     "%s after %u msecs\n", dlm->name,
-                                    jiffies_to_msecs(total_backoff));
+                                    total_backoff);
                                goto bail;
                        }
 
index 82abf0cc9a12e2fbc8531f2542830d07ed100293..3ec906ef5d9a622ff4b130f12907d503bd89c3e0 100644 (file)
@@ -2405,6 +2405,10 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
        if (res->state & DLM_LOCK_RES_MIGRATING)
                return 0;
 
+       /* delay migration when the lockres is in RECOCERING state */
+       if (res->state & DLM_LOCK_RES_RECOVERING)
+               return 0;
+
        if (res->owner != dlm->node_num)
                return 0;
 
index 599eb4c4c8beedaec8c18649b47cc2c1fe487680..6219aaadeb08dadeb44ebdacb2b2559edfc52d7c 100644 (file)
@@ -98,7 +98,7 @@ static int __ocfs2_move_extent(handle_t *handle,
        el = path_leaf_el(path);
 
        index = ocfs2_search_extent_list(el, cpos);
-       if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+       if (index == -1) {
                ocfs2_error(inode->i_sb,
                            "Inode %llu has an extent at cpos %u which can no "
                            "longer be found.\n",
index 636aab69ead559f718a9ebef4e6ca5e8dfad933e..d81f6e2a97f5d4c70d6324cf0a7483a874cd7b9a 100644 (file)
@@ -3109,7 +3109,7 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
        el = path_leaf_el(path);
 
        index = ocfs2_search_extent_list(el, cpos);
-       if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+       if (index == -1) {
                ocfs2_error(sb,
                            "Inode %llu has an extent at cpos %u which can no "
                            "longer be found.\n",
index 1424c151cccce0170819ce4e0f36dad7d97461b8..a88b2a4fcc85171210cd17f8e4f9a791d5aa1d2b 100644 (file)
@@ -382,7 +382,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 
        trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
 
-       si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
+       si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
                            GFP_KERNEL);
        if (!si->si_bh) {
                status = -ENOMEM;
index 7445af0b1aa341adb9008d05494890f11c4a4568..aa1eee06420f677b9f5a3594dd66b19567e46295 100644 (file)
@@ -168,7 +168,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                K(global_page_state(NR_WRITEBACK)),
                K(global_page_state(NR_ANON_PAGES)),
                K(global_page_state(NR_FILE_MAPPED)),
-               K(global_page_state(NR_SHMEM)),
+               K(i.sharedram),
                K(global_page_state(NR_SLAB_RECLAIMABLE) +
                                global_page_state(NR_SLAB_UNRECLAIMABLE)),
                K(global_page_state(NR_SLAB_RECLAIMABLE)),
index cfa63ee92c96c9e8b715fc73864ca81053c2b237..dfc791c42d6491c6d1a4537e6ee5d7c9bd21a24b 100644 (file)
@@ -925,15 +925,30 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
                                struct mm_walk *walk)
 {
        struct pagemapread *pm = walk->private;
-       unsigned long addr;
+       unsigned long addr = start;
        int err = 0;
-       pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
 
-       for (addr = start; addr < end; addr += PAGE_SIZE) {
-               err = add_to_pagemap(addr, &pme, pm);
-               if (err)
-                       break;
+       while (addr < end) {
+               struct vm_area_struct *vma = find_vma(walk->mm, addr);
+               pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+               unsigned long vm_end;
+
+               if (!vma) {
+                       vm_end = end;
+               } else {
+                       vm_end = min(end, vma->vm_end);
+                       if (vma->vm_flags & VM_SOFTDIRTY)
+                               pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY);
+               }
+
+               for (; addr < vm_end; addr += PAGE_SIZE) {
+                       err = add_to_pagemap(addr, &pme, pm);
+                       if (err)
+                               goto out;
+               }
        }
+
+out:
        return err;
 }
 
index 62a0de6632e1aa3c8de599e19db3f8d7e2a383e2..43e7a7eddac03cf7ca659cd891ca219bf702aade 100644 (file)
@@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
 
        pages = end_index - start_index + 1;
 
-       page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+       page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
        if (page == NULL)
                return res;
 
index 031c8d67fd5178bb5afca2b04c71637254b46873..5056babe00df93249465c22b8b6dc6d0ebc1723d 100644 (file)
@@ -27,6 +27,8 @@
  * the filesystem.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
@@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void)
                return err;
        }
 
-       printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
-               "Phillip Lougher\n");
+       pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
 
        return 0;
 }
index 7ad634501e48cd7d39b1e8b621f0affce26765d7..e1c8d080c4271d655f75a771ec71ab6ee81c66e3 100644 (file)
  * lib/bitmap.c provides these functions:
  */
 
-extern int __bitmap_empty(const unsigned long *bitmap, int bits);
-extern int __bitmap_full(const unsigned long *bitmap, int bits);
+extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
+extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                         const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
-                       int bits);
+                       unsigned int nbits);
 extern void __bitmap_shift_right(unsigned long *dst,
                         const unsigned long *src, int shift, int bits);
 extern void __bitmap_shift_left(unsigned long *dst,
                         const unsigned long *src, int shift, int bits);
 extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
 extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_intersects(const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_subset(const unsigned long *bitmap1,
-                       const unsigned long *bitmap2, int bits);
-extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+                       const unsigned long *bitmap2, unsigned int nbits);
+extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
 
-extern void bitmap_set(unsigned long *map, int i, int len);
-extern void bitmap_clear(unsigned long *map, int start, int nr);
+extern void bitmap_set(unsigned long *map, unsigned int start, int len);
+extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
 extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
                                         unsigned long size,
                                         unsigned long start,
@@ -140,9 +140,9 @@ extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
                const unsigned long *relmap, int bits);
 extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
                int sz, int bits);
-extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
-extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
-extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
+extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
+extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
+extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
 extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
 extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
 
@@ -188,15 +188,15 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 }
 
 static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
-               return (*dst = *src1 & *src2) != 0;
+               return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        return __bitmap_and(dst, src1, src2, nbits);
 }
 
 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                *dst = *src1 | *src2;
@@ -205,7 +205,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                *dst = *src1 ^ *src2;
@@ -214,24 +214,24 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 }
 
 static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
-               return (*dst = *src1 & ~(*src2)) != 0;
+               return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
        return __bitmap_andnot(dst, src1, src2, nbits);
 }
 
 static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
-                       int nbits)
+                       unsigned int nbits)
 {
        if (small_const_nbits(nbits))
-               *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+               *dst = ~(*src);
        else
                __bitmap_complement(dst, src, nbits);
 }
 
 static inline int bitmap_equal(const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -240,7 +240,7 @@ static inline int bitmap_equal(const unsigned long *src1,
 }
 
 static inline int bitmap_intersects(const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -249,7 +249,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
 }
 
 static inline int bitmap_subset(const unsigned long *src1,
-                       const unsigned long *src2, int nbits)
+                       const unsigned long *src2, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
@@ -257,7 +257,7 @@ static inline int bitmap_subset(const unsigned long *src1,
                return __bitmap_subset(src1, src2, nbits);
 }
 
-static inline int bitmap_empty(const unsigned long *src, int nbits)
+static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
 {
        if (small_const_nbits(nbits))
                return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -265,7 +265,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
                return __bitmap_empty(src, nbits);
 }
 
-static inline int bitmap_full(const unsigned long *src, int nbits)
+static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -273,7 +273,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
                return __bitmap_full(src, nbits);
 }
 
-static inline int bitmap_weight(const unsigned long *src, int nbits)
+static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
 {
        if (small_const_nbits(nbits))
                return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -284,7 +284,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
                        const unsigned long *src, int n, int nbits)
 {
        if (small_const_nbits(nbits))
-               *dst = *src >> n;
+               *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n;
        else
                __bitmap_shift_right(dst, src, n, nbits);
 }
index 0846e6b931cefc9ea9baedfe9bfc93300e4c474b..89f67c1c316064614fca10a47a3be5319bbe874d 100644 (file)
@@ -2,7 +2,7 @@
 #define _LINUX_BYTEORDER_GENERIC_H
 
 /*
- * linux/byteorder_generic.h
+ * linux/byteorder/generic.h
  * Generic Byte-reordering support
  *
  * The "... p" macros, like le64_to_cpup, can be used with pointers
diff --git a/include/linux/cma.h b/include/linux/cma.h
new file mode 100644 (file)
index 0000000..371b930
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef __CMA_H__
+#define __CMA_H__
+
+/*
+ * There is always at least global CMA area and a few optional
+ * areas configured in kernel .config.
+ */
+#ifdef CONFIG_CMA_AREAS
+#define MAX_CMA_AREAS  (1 + CONFIG_CMA_AREAS)
+
+#else
+#define MAX_CMA_AREAS  (0)
+
+#endif
+
+struct cma;
+
+extern phys_addr_t cma_get_base(struct cma *cma);
+extern unsigned long cma_get_size(struct cma *cma);
+
+extern int __init cma_declare_contiguous(phys_addr_t size,
+                       phys_addr_t base, phys_addr_t limit,
+                       phys_addr_t alignment, unsigned int order_per_bit,
+                       bool fixed, struct cma **res_cma);
+extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
+extern bool cma_release(struct cma *cma, struct page *pages, int count);
+#endif
index 772eab5d524abb7e6e2bff2fa60cb8fadaad7054..569bbd039896f330923d53b4a6231ba1c73e70cc 100644 (file)
 
 #ifdef __KERNEL__
 
+#include <linux/device.h>
+
 struct cma;
 struct page;
-struct device;
 
 #ifdef CONFIG_DMA_CMA
 
-/*
- * There is always at least global CMA area and a few optional device
- * private areas configured in kernel .config.
- */
-#define MAX_CMA_AREAS  (1 + CONFIG_CMA_AREAS)
-
 extern struct cma *dma_contiguous_default_area;
 
 static inline struct cma *dev_get_cma_area(struct device *dev)
@@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 
 #else
 
-#define MAX_CMA_AREAS  (0)
-
 static inline struct cma *dev_get_cma_area(struct device *dev)
 {
        return NULL;
index 2daccaf4b547cc293a5315d7a112de1851959c89..1ab6c6913040523cb4a097b3a0bef71ce4fe4dab 100644 (file)
@@ -2688,7 +2688,7 @@ static const struct file_operations __fops = {                            \
        .read    = simple_attr_read,                                    \
        .write   = simple_attr_write,                                   \
        .llseek  = generic_file_llseek,                                 \
-};
+}
 
 static inline __printf(1, 2)
 void __simple_attr_check_format(const char *fmt, ...)
index fc7718c6bd3ebae5fe6dd3a6c9ee616285977204..ca060d7c4fa63e3395c4754ed8e8f5ab32ceff30 100644 (file)
@@ -322,16 +322,18 @@ extern int fsnotify_fasync(int fd, struct file *file, int on);
 extern void fsnotify_destroy_event(struct fsnotify_group *group,
                                   struct fsnotify_event *event);
 /* attach the event to the group notification queue */
-extern int fsnotify_add_notify_event(struct fsnotify_group *group,
-                                    struct fsnotify_event *event,
-                                    int (*merge)(struct list_head *,
-                                                 struct fsnotify_event *));
+extern int fsnotify_add_event(struct fsnotify_group *group,
+                             struct fsnotify_event *event,
+                             int (*merge)(struct list_head *,
+                                          struct fsnotify_event *));
+/* Remove passed event from groups notification queue */
+extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
 /* return AND dequeue the first event on the notification queue */
-extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
+extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group);
 
 /* functions used to manipulate the marks attached to inodes */
 
index 6eb1fb37de9a452534e1a1f42cdc2f10410bf600..5e7219dc0fae44968c452d9e8e6b642435cf74b6 100644 (file)
@@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
 void free_pages_exact(void *virt, size_t size);
 /* This is different from alloc_pages_exact_node !!! */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
                __get_free_pages((gfp_mask), 0)
diff --git a/include/linux/glob.h b/include/linux/glob.h
new file mode 100644 (file)
index 0000000..861d834
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _LINUX_GLOB_H
+#define _LINUX_GLOB_H
+
+#include <linux/types.h>       /* For bool */
+#include <linux/compiler.h>    /* For __pure */
+
+bool __pure glob_match(char const *pat, char const *str);
+
+#endif /* _LINUX_GLOB_H */
index 7fb31da45d03bbb40f1840159fc646736c187fb3..9286a46b7d69b539f027bcc890b3be976d20f228 100644 (file)
@@ -93,7 +93,7 @@ static inline int kmap_atomic_idx_push(void)
 
 #ifdef CONFIG_DEBUG_HIGHMEM
        WARN_ON_ONCE(in_irq() && !irqs_disabled());
-       BUG_ON(idx > KM_TYPE_NR);
+       BUG_ON(idx >= KM_TYPE_NR);
 #endif
        return idx;
 }
index b826239bdce0b26a614ae0802782860348dd6fc6..63579cb8d3dcfb5b7d36b80cc19d784e1522e3c9 100644 (file)
@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 #endif /* CONFIG_DEBUG_VM */
 
 extern unsigned long transparent_hugepage_flags;
-extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-                         pmd_t *dst_pmd, pmd_t *src_pmd,
-                         struct vm_area_struct *vma,
-                         unsigned long addr, unsigned long end);
 extern int split_huge_page_to_list(struct page *page, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
index a23c096b30807c20db8d967dbb130f4842fa44e2..6e6d338641fe7efc91641df82c494645526428a7 100644 (file)
@@ -87,7 +87,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
 #endif
 
 extern unsigned long hugepages_treat_as_movable;
-extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 extern struct list_head huge_boot_pages;
 
index a9e2268ecccb0c0f96a02add9b7f122421f223b6..3dc22abbc68a212023d90d54556a94d8026bcfcb 100644 (file)
@@ -493,11 +493,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
        return buf;
 }
 
-static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
-{
-       return hex_byte_pack(buf, byte);
-}
-
 extern int hex_to_bin(char ch);
 extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
 
index a370ce57cf1d98e3e707a6a40c77b9318f4fc9ac..61e5b723ae73dbb1a424665c6e8a5d0fd8f9b2a7 100644 (file)
@@ -44,7 +44,7 @@ struct klist_node {
 
 extern void klist_add_tail(struct klist_node *n, struct klist *k);
 extern void klist_add_head(struct klist_node *n, struct klist *k);
-extern void klist_add_after(struct klist_node *n, struct klist_node *pos);
+extern void klist_add_behind(struct klist_node *n, struct klist_node *pos);
 extern void klist_add_before(struct klist_node *n, struct klist_node *pos);
 
 extern void klist_del(struct klist_node *n);
index ef959417106222d475fbc01c419cf42db05dcca6..cbbb96fcead9208da224866d849f3b3dfcf0337e 100644 (file)
@@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n,
        *(n->pprev) = n;
 }
 
-static inline void hlist_add_after(struct hlist_node *n,
-                                       struct hlist_node *next)
+static inline void hlist_add_behind(struct hlist_node *n,
+                                   struct hlist_node *prev)
 {
-       next->next = n->next;
-       n->next = next;
-       next->pprev = &n->next;
+       n->next = prev->next;
+       prev->next = n;
+       n->pprev = &prev->next;
 
-       if(next->next)
-               next->next->pprev  = &next->next;
+       if (n->next)
+               n->next->pprev  = &n->next;
 }
 
 /* after that we'll appear to be on some hlist and hlist_del will work */
index b660e05b63d4fbc90e211d569f6d0229a2041fcc..e8cc45307f8f0d0f9897f0eb36d96b3c9fa7655c 100644 (file)
@@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
 /*
  * Set the allocation direction to bottom-up or top-down.
  */
-static inline void memblock_set_bottom_up(bool enable)
+static inline void __init memblock_set_bottom_up(bool enable)
 {
        memblock.bottom_up = enable;
 }
@@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void)
        return memblock.bottom_up;
 }
 #else
-static inline void memblock_set_bottom_up(bool enable) {}
+static inline void __init memblock_set_bottom_up(bool enable) {}
 static inline bool memblock_bottom_up(void) { return false; }
 #endif
 
index 010d125bffbf5f41658878fe0dc6a25f062d1660..d9524c49d767b21a44c5513c2a31a3e47312d729 100644 (file)
@@ -26,11 +26,12 @@ enum {
        MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
 };
 
-/* Types for control the zone type of onlined memory */
+/* Types for control the zone type of onlined and offlined memory */
 enum {
-       ONLINE_KEEP,
-       ONLINE_KERNEL,
-       ONLINE_MOVABLE,
+       MMOP_OFFLINE = -1,
+       MMOP_ONLINE_KEEP,
+       MMOP_ONLINE_KERNEL,
+       MMOP_ONLINE_MOVABLE,
 };
 
 /*
@@ -258,6 +259,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
                void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
+extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
index edd82a105220621a240f98c3097f8e24c145a38c..2f348d02f640fb049ed9c7e17b2742ba7e10f403 100644 (file)
@@ -20,11 +20,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
        } while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
+#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #endif
 
 #ifdef CONFIG_DEBUG_VIRTUAL
index deca87452528b2888823a4daf4cb148d43a52544..27288692241eebe928d9ec6020dc3c42d13f3755 100644 (file)
@@ -170,6 +170,8 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn,
                                   struct mm_struct *mm);
 extern void mmu_notifier_unregister(struct mmu_notifier *mn,
                                    struct mm_struct *mm);
+extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+                                              struct mm_struct *mm);
 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
@@ -288,6 +290,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 })
 
+extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
+                                  void (*func)(struct rcu_head *rcu));
+extern void mmu_notifier_synchronize(void);
+
 #else /* CONFIG_MMU_NOTIFIER */
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
index 6cbd1b6c3d2071e27bdce852b8bdb152f7fd0710..318df70518509249bb67a8e9811bf20fdf5fc519 100644 (file)
@@ -143,6 +143,7 @@ enum zone_stat_item {
        NR_SHMEM,               /* shmem pages (included tmpfs/GEM pages) */
        NR_DIRTIED,             /* page dirtyings since bootup */
        NR_WRITTEN,             /* page writings since bootup */
+       NR_PAGES_SCANNED,       /* pages scanned since last reclaim */
 #ifdef CONFIG_NUMA
        NUMA_HIT,               /* allocated in intended node */
        NUMA_MISS,              /* allocated in non intended node */
@@ -324,18 +325,11 @@ enum zone_type {
 #ifndef __GENERATING_BOUNDS_H
 
 struct zone {
-       /* Fields commonly accessed by the page allocator */
+       /* Read-mostly fields */
 
        /* zone watermarks, access with *_wmark_pages(zone) macros */
        unsigned long watermark[NR_WMARK];
 
-       /*
-        * When free pages are below this point, additional steps are taken
-        * when reading the number of free pages to avoid per-cpu counter
-        * drift allowing watermarks to be breached
-        */
-       unsigned long percpu_drift_mark;
-
        /*
         * We don't know if the memory that we're going to allocate will be freeable
         * or/and it will be released eventually, so to avoid totally wasting several
@@ -344,41 +338,26 @@ struct zone {
         * on the higher zones). This array is recalculated at runtime if the
         * sysctl_lowmem_reserve_ratio sysctl changes.
         */
-       unsigned long           lowmem_reserve[MAX_NR_ZONES];
-
-       /*
-        * This is a per-zone reserve of pages that should not be
-        * considered dirtyable memory.
-        */
-       unsigned long           dirty_balance_reserve;
+       long lowmem_reserve[MAX_NR_ZONES];
 
 #ifdef CONFIG_NUMA
        int node;
+#endif
+
        /*
-        * zone reclaim becomes active if more unmapped pages exist.
+        * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+        * this zone's LRU.  Maintained by the pageout code.
         */
-       unsigned long           min_unmapped_pages;
-       unsigned long           min_slab_pages;
-#endif
+       unsigned int inactive_ratio;
+
+       struct pglist_data      *zone_pgdat;
        struct per_cpu_pageset __percpu *pageset;
+
        /*
-        * free areas of different sizes
+        * This is a per-zone reserve of pages that should not be
+        * considered dirtyable memory.
         */
-       spinlock_t              lock;
-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
-       /* Set to true when the PG_migrate_skip bits should be cleared */
-       bool                    compact_blockskip_flush;
-
-       /* pfn where compaction free scanner should start */
-       unsigned long           compact_cached_free_pfn;
-       /* pfn where async and sync compaction migration scanner should start */
-       unsigned long           compact_cached_migrate_pfn[2];
-#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
-       /* see spanned/present_pages for more description */
-       seqlock_t               span_seqlock;
-#endif
-       struct free_area        free_area[MAX_ORDER];
+       unsigned long           dirty_balance_reserve;
 
 #ifndef CONFIG_SPARSEMEM
        /*
@@ -388,74 +367,14 @@ struct zone {
        unsigned long           *pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
-#ifdef CONFIG_COMPACTION
-       /*
-        * On compaction failure, 1<<compact_defer_shift compactions
-        * are skipped before trying again. The number attempted since
-        * last failure is tracked with compact_considered.
-        */
-       unsigned int            compact_considered;
-       unsigned int            compact_defer_shift;
-       int                     compact_order_failed;
-#endif
-
-       ZONE_PADDING(_pad1_)
-
-       /* Fields commonly accessed by the page reclaim scanner */
-       spinlock_t              lru_lock;
-       struct lruvec           lruvec;
-
-       /* Evictions & activations on the inactive file list */
-       atomic_long_t           inactive_age;
-
-       unsigned long           pages_scanned;     /* since last reclaim */
-       unsigned long           flags;             /* zone flags, see below */
-
-       /* Zone statistics */
-       atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
-
-       /*
-        * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
-        * this zone's LRU.  Maintained by the pageout code.
-        */
-       unsigned int inactive_ratio;
-
-
-       ZONE_PADDING(_pad2_)
-       /* Rarely used or read-mostly fields */
-
+#ifdef CONFIG_NUMA
        /*
-        * wait_table           -- the array holding the hash table
-        * wait_table_hash_nr_entries   -- the size of the hash table array
-        * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
-        *
-        * The purpose of all these is to keep track of the people
-        * waiting for a page to become available and make them
-        * runnable again when possible. The trouble is that this
-        * consumes a lot of space, especially when so few things
-        * wait on pages at a given time. So instead of using
-        * per-page waitqueues, we use a waitqueue hash table.
-        *
-        * The bucket discipline is to sleep on the same queue when
-        * colliding and wake all in that wait queue when removing.
-        * When something wakes, it must check to be sure its page is
-        * truly available, a la thundering herd. The cost of a
-        * collision is great, but given the expected load of the
-        * table, they should be so rare as to be outweighed by the
-        * benefits from the saved space.
-        *
-        * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
-        * primary users of these fields, and in mm/page_alloc.c
-        * free_area_init_core() performs the initialization of them.
+        * zone reclaim becomes active if more unmapped pages exist.
         */
-       wait_queue_head_t       * wait_table;
-       unsigned long           wait_table_hash_nr_entries;
-       unsigned long           wait_table_bits;
+       unsigned long           min_unmapped_pages;
+       unsigned long           min_slab_pages;
+#endif /* CONFIG_NUMA */
 
-       /*
-        * Discontig memory support fields.
-        */
-       struct pglist_data      *zone_pgdat;
        /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
        unsigned long           zone_start_pfn;
 
@@ -500,9 +419,11 @@ struct zone {
         * adjust_managed_page_count() should be used instead of directly
         * touching zone->managed_pages and totalram_pages.
         */
+       unsigned long           managed_pages;
        unsigned long           spanned_pages;
        unsigned long           present_pages;
-       unsigned long           managed_pages;
+
+       const char              *name;
 
        /*
         * Number of MIGRATE_RESEVE page block. To maintain for just
@@ -510,10 +431,94 @@ struct zone {
         */
        int                     nr_migrate_reserve_block;
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+       /* see spanned/present_pages for more description */
+       seqlock_t               span_seqlock;
+#endif
+
        /*
-        * rarely used fields:
+        * wait_table           -- the array holding the hash table
+        * wait_table_hash_nr_entries   -- the size of the hash table array
+        * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
+        *
+        * The purpose of all these is to keep track of the people
+        * waiting for a page to become available and make them
+        * runnable again when possible. The trouble is that this
+        * consumes a lot of space, especially when so few things
+        * wait on pages at a given time. So instead of using
+        * per-page waitqueues, we use a waitqueue hash table.
+        *
+        * The bucket discipline is to sleep on the same queue when
+        * colliding and wake all in that wait queue when removing.
+        * When something wakes, it must check to be sure its page is
+        * truly available, a la thundering herd. The cost of a
+        * collision is great, but given the expected load of the
+        * table, they should be so rare as to be outweighed by the
+        * benefits from the saved space.
+        *
+        * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+        * primary users of these fields, and in mm/page_alloc.c
+        * free_area_init_core() performs the initialization of them.
         */
-       const char              *name;
+       wait_queue_head_t       *wait_table;
+       unsigned long           wait_table_hash_nr_entries;
+       unsigned long           wait_table_bits;
+
+       ZONE_PADDING(_pad1_)
+
+       /* Write-intensive fields used from the page allocator */
+       spinlock_t              lock;
+
+       /* free areas of different sizes */
+       struct free_area        free_area[MAX_ORDER];
+
+       /* zone flags, see below */
+       unsigned long           flags;
+
+       ZONE_PADDING(_pad2_)
+
+       /* Write-intensive fields used by page reclaim */
+
+       /* Fields commonly accessed by the page reclaim scanner */
+       spinlock_t              lru_lock;
+       struct lruvec           lruvec;
+
+       /* Evictions & activations on the inactive file list */
+       atomic_long_t           inactive_age;
+
+       /*
+        * When free pages are below this point, additional steps are taken
+        * when reading the number of free pages to avoid per-cpu counter
+        * drift allowing watermarks to be breached
+        */
+       unsigned long percpu_drift_mark;
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+       /* pfn where compaction free scanner should start */
+       unsigned long           compact_cached_free_pfn;
+       /* pfn where async and sync compaction migration scanner should start */
+       unsigned long           compact_cached_migrate_pfn[2];
+#endif
+
+#ifdef CONFIG_COMPACTION
+       /*
+        * On compaction failure, 1<<compact_defer_shift compactions
+        * are skipped before trying again. The number attempted since
+        * last failure is tracked with compact_considered.
+        */
+       unsigned int            compact_considered;
+       unsigned int            compact_defer_shift;
+       int                     compact_order_failed;
+#endif
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+       /* Set to true when the PG_migrate_skip bits should be cleared */
+       bool                    compact_blockskip_flush;
+#endif
+
+       ZONE_PADDING(_pad3_)
+       /* Zone statistics */
+       atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS];
 } ____cacheline_internodealigned_in_smp;
 
 typedef enum {
@@ -529,6 +534,7 @@ typedef enum {
        ZONE_WRITEBACK,                 /* reclaim scanning has recently found
                                         * many pages under writeback
                                         */
+       ZONE_FAIR_DEPLETED,             /* fair zone policy batch depleted */
 } zone_flags_t;
 
 static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -566,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone)
        return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
 }
 
+static inline int zone_is_fair_depleted(const struct zone *zone)
+{
+       return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
+}
+
 static inline int zone_is_oom_locked(const struct zone *zone)
 {
        return test_bit(ZONE_OOM_LOCKED, &zone->flags);
@@ -872,6 +883,8 @@ static inline int zone_movable_is_highmem(void)
 {
 #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
        return movable_zone == ZONE_HIGHMEM;
+#elif defined(CONFIG_HIGHMEM)
+       return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
 #else
        return 0;
 #endif
index 58b9a02c38d29b118d98d7bbd22ac82dde9ec342..83a6aeda899d564843a8a6ea9a4bf090d3da1fe1 100644 (file)
@@ -430,7 +430,15 @@ static inline int num_node_state(enum node_states state)
        for_each_node_mask((__node), node_states[__state])
 
 #define first_online_node      first_node(node_states[N_ONLINE])
-#define next_online_node(nid)  next_node((nid), node_states[N_ONLINE])
+#define first_memory_node      first_node(node_states[N_MEMORY])
+static inline int next_online_node(int nid)
+{
+       return next_node(nid, node_states[N_ONLINE]);
+}
+static inline int next_memory_node(int nid)
+{
+       return next_node(nid, node_states[N_MEMORY]);
+}
 
 extern int nr_node_ids;
 extern int nr_online_nodes;
@@ -471,6 +479,7 @@ static inline int num_node_state(enum node_states state)
        for ( (node) = 0; (node) == 0; (node) = 1)
 
 #define first_online_node      0
+#define first_memory_node      0
 #define next_online_node(nid)  (MAX_NUMNODES)
 #define nr_node_ids            1
 #define nr_online_nodes                1
index 4cd62677feb9eaf8f9a4647231663a8d7a42f986..647395a1a5508f7f138e80ad24afea4fd09638d3 100644 (file)
@@ -55,8 +55,8 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
                             struct mem_cgroup *memcg, nodemask_t *nodemask,
                             const char *message);
 
-extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
-extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
+extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags);
+extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags);
 
 extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
                               int order, const nodemask_t *nodemask);
index 8304959ad33641b892f05fb216466b691b871caa..e1f5fcd79792c6b121106618f087efa7130692b5 100644 (file)
@@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page)               \
 #define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)            \
        __SETPAGEFLAG(uname, lname)  __CLEARPAGEFLAG(uname, lname)
 
-#define PAGEFLAG_FALSE(uname)                                          \
-static inline int Page##uname(const struct page *page)                 \
-                       { return 0; }
-
 #define TESTSCFLAG(uname, lname)                                       \
        TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
 
+#define TESTPAGEFLAG_FALSE(uname)                                      \
+static inline int Page##uname(const struct page *page) { return 0; }
+
 #define SETPAGEFLAG_NOOP(uname)                                                \
 static inline void SetPage##uname(struct page *page) {  }
 
@@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) {  }
 #define __CLEARPAGEFLAG_NOOP(uname)                                    \
 static inline void __ClearPage##uname(struct page *page) {  }
 
+#define TESTSETFLAG_FALSE(uname)                                       \
+static inline int TestSetPage##uname(struct page *page) { return 0; }
+
 #define TESTCLEARFLAG_FALSE(uname)                                     \
 static inline int TestClearPage##uname(struct page *page) { return 0; }
 
 #define __TESTCLEARFLAG_FALSE(uname)                                   \
 static inline int __TestClearPage##uname(struct page *page) { return 0; }
 
+#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname)                        \
+       SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+
+#define TESTSCFLAG_FALSE(uname)                                                \
+       TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+
 struct page;   /* forward declaration */
 
 TESTPAGEFLAG(Locked, locked)
@@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem)
 PAGEFLAG(SwapCache, swapcache)
 #else
 PAGEFLAG_FALSE(SwapCache)
-       SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
 #endif
 
 PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
@@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
        TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
 #else
-PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
-       TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
+       TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
 #endif
 
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
index e1474ae18c8847cba4a2f17c396c6e7b59167fed..3df8c7db7a4ec64e65b0ece15ad03a5fd58ccb30 100644 (file)
@@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
 /*
  * lock_page_or_retry - Lock the page, unless this would block and the
  * caller indicated that it can handle a retry.
+ *
+ * Return value and mmap_sem implications depend on flags; see
+ * __lock_page_or_retry().
  */
 static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
                                     unsigned int flags)
index 319ff7e53efbc1f0bd551125bd70951c2a8c83ea..0990997a5304bb13f798bd79cd96097eb17681e5 100644 (file)
@@ -31,7 +31,7 @@ static inline const char *printk_skip_level(const char *buffer)
 }
 
 /* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
 
 /* We show everything that is MORE important than this.. */
 #define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
index 8183b46fbaa2d6da9817ead257735396a6ec7b0c..372ad5e0dcb88df4af003686c7057af3e900d281 100644 (file)
@@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
 }
 
 /**
- * hlist_add_after_rcu
- * @prev: the existing element to add the new element after.
+ * hlist_add_behind_rcu
  * @n: the new element to add to the hash list.
+ * @prev: the existing element to add the new element after.
  *
  * Description:
  * Adds the specified element to the specified hlist
@@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
  * hlist_for_each_entry_rcu(), used to prevent memory-consistency
  * problems on Alpha CPUs.
  */
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
-                                      struct hlist_node *n)
+static inline void hlist_add_behind_rcu(struct hlist_node *n,
+                                       struct hlist_node *prev)
 {
        n->next = prev->next;
        n->pprev = &prev->next;
index 4bdbee80eede2bb45e2615d3a875be4bb65659b3..1eb64043c076fe97db25443feb7ddb5bfe32e38c 100644 (file)
@@ -311,7 +311,6 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail,
                         struct lruvec *lruvec, struct list_head *head);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
-extern void init_page_accessed(struct page *page);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
index 4b8a89189a296143a5d17eac9e5bec5dafa42afe..b87696fdf06ab14d14ff01f5cfd7b2650f1e6c96 100644 (file)
@@ -113,7 +113,7 @@ extern struct vm_struct *remove_vm_area(const void *addr);
 extern struct vm_struct *find_vm_area(const void *addr);
 
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
-                       struct page ***pages);
+                       struct page **pages);
 #ifdef CONFIG_MMU
 extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
                                    pgprot_t prot, struct page **pages);
index 13af0d450bf6e088fc2b603fc0be2c35d8cbf7f6..f9d41a6e361f42f79a20bfd0c5b2f4579d61dcac 100644 (file)
@@ -11,7 +11,7 @@ struct zbud_ops {
 
 struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
 void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
        unsigned long *handle);
 void zbud_free(struct zbud_pool *pool, unsigned long handle);
 int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
index 9c5a6b4de0a30d716b2ec72470813be3a2dd82a8..197abb2a54c5d713dd1a909da735bee60ab28d12 100644 (file)
@@ -493,64 +493,6 @@ extern int deflateInit2 (z_streamp strm,
    method). msg is set to null if there is no error message.  deflateInit2 does
    not perform any compression: this will be done by deflate().
 */
-                            
-#if 0
-extern int zlib_deflateSetDictionary (z_streamp strm,
-                                                    const Byte *dictionary,
-                                                    uInt  dictLength);
-#endif
-/*
-     Initializes the compression dictionary from the given byte sequence
-   without producing any compressed output. This function must be called
-   immediately after deflateInit, deflateInit2 or deflateReset, before any
-   call of deflate. The compressor and decompressor must use exactly the same
-   dictionary (see inflateSetDictionary).
-
-     The dictionary should consist of strings (byte sequences) that are likely
-   to be encountered later in the data to be compressed, with the most commonly
-   used strings preferably put towards the end of the dictionary. Using a
-   dictionary is most useful when the data to be compressed is short and can be
-   predicted with good accuracy; the data can then be compressed better than
-   with the default empty dictionary.
-
-     Depending on the size of the compression data structures selected by
-   deflateInit or deflateInit2, a part of the dictionary may in effect be
-   discarded, for example if the dictionary is larger than the window size in
-   deflate or deflate2. Thus the strings most likely to be useful should be
-   put at the end of the dictionary, not at the front.
-
-     Upon return of this function, strm->adler is set to the Adler32 value
-   of the dictionary; the decompressor may later use this value to determine
-   which dictionary has been used by the compressor. (The Adler32 value
-   applies to the whole dictionary even if only a subset of the dictionary is
-   actually used by the compressor.)
-
-     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent (for example if deflate has already been called for this stream
-   or if the compression method is bsort). deflateSetDictionary does not
-   perform any compression: this will be done by deflate().
-*/
-
-#if 0
-extern int zlib_deflateCopy (z_streamp dest, z_streamp source);
-#endif
-
-/*
-     Sets the destination stream as a complete copy of the source stream.
-
-     This function can be useful when several compression strategies will be
-   tried, for example when there are several ways of pre-processing the input
-   data with a filter. The streams that will be discarded should then be freed
-   by calling deflateEnd.  Note that deflateCopy duplicates the internal
-   compression state which can be quite large, so this strategy is slow and
-   can consume lots of memory.
-
-     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-   (such as zalloc being NULL). msg is left unchanged in both source and
-   destination.
-*/
 
 extern int zlib_deflateReset (z_streamp strm);
 /*
@@ -568,27 +510,6 @@ static inline unsigned long deflateBound(unsigned long s)
        return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
 }
 
-#if 0
-extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
-#endif
-/*
-     Dynamically update the compression level and compression strategy.  The
-   interpretation of level and strategy is as in deflateInit2.  This can be
-   used to switch between compression and straight copy of the input data, or
-   to switch to a different kind of input data requiring a different
-   strategy. If the compression level is changed, the input available so far
-   is compressed with the old level (and may be flushed); the new level will
-   take effect only at the next call of deflate().
-
-     Before the call of deflateParams, the stream state must be set as for
-   a call of deflate(), since the currently available input may have to
-   be compressed and flushed. In particular, strm->avail_out must be non-zero.
-
-     deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
-   stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
-   if strm->avail_out was zero.
-*/
-
 /*   
 extern int inflateInit2 (z_streamp strm, int  windowBits);
 
@@ -631,45 +552,6 @@ extern int inflateInit2 (z_streamp strm, int  windowBits);
    and avail_out are unchanged.)
 */
 
-extern int zlib_inflateSetDictionary (z_streamp strm,
-                                                    const Byte *dictionary,
-                                                    uInt  dictLength);
-/*
-     Initializes the decompression dictionary from the given uncompressed byte
-   sequence. This function must be called immediately after a call of inflate,
-   if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
-   can be determined from the adler32 value returned by that call of inflate.
-   The compressor and decompressor must use exactly the same dictionary (see
-   deflateSetDictionary).  For raw inflate, this function can be called
-   immediately after inflateInit2() or inflateReset() and before any call of
-   inflate() to set the dictionary.  The application must insure that the
-   dictionary that was used for compression is provided.
-
-     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
-   parameter is invalid (such as NULL dictionary) or the stream state is
-   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
-   expected one (incorrect adler32 value). inflateSetDictionary does not
-   perform any decompression: this will be done by subsequent calls of
-   inflate().
-*/
-
-#if 0
-extern int zlib_inflateSync (z_streamp strm);
-#endif
-/* 
-    Skips invalid compressed data until a full flush point (see above the
-  description of deflate with Z_FULL_FLUSH) can be found, or until all
-  available input is skipped. No output is provided.
-
-    inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
-  if no more input was provided, Z_DATA_ERROR if no flush point has been found,
-  or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
-  case, the application may save the current current value of total_in which
-  indicates where valid compressed data was found. In the error case, the
-  application may repeatedly call inflateSync, providing more input each time,
-  until success or end of the input data.
-*/
-
 extern int zlib_inflateReset (z_streamp strm);
 /*
      This function is equivalent to inflateEnd followed by inflateInit,
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
new file mode 100644 (file)
index 0000000..f14bd75
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for the zbud and zsmalloc memory
+ * storage pool implementations.  Typically, this is used to
+ * store compressed memory.
+ */
+
+#ifndef _ZPOOL_H_
+#define _ZPOOL_H_
+
+struct zpool;
+
+struct zpool_ops {
+       int (*evict)(struct zpool *pool, unsigned long handle);
+};
+
+/*
+ * Control how a handle is mapped.  It will be ignored if the
+ * implementation does not support it.  Its use is optional.
+ * Note that this does not refer to memory protection, it
+ * refers to how the memory will be copied in/out if copying
+ * is necessary during mapping; read-write is the safest as
+ * it copies the existing memory in on map, and copies the
+ * changed memory back out on unmap.  Write-only does not copy
+ * in the memory and should only be used for initialization.
+ * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
+ */
+enum zpool_mapmode {
+       ZPOOL_MM_RW, /* normal read-write mapping */
+       ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
+       ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
+
+       ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
+};
+
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops);
+
+char *zpool_get_type(struct zpool *pool);
+
+void zpool_destroy_pool(struct zpool *pool);
+
+int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
+                       unsigned long *handle);
+
+void zpool_free(struct zpool *pool, unsigned long handle);
+
+int zpool_shrink(struct zpool *pool, unsigned int pages,
+                       unsigned int *reclaimed);
+
+void *zpool_map_handle(struct zpool *pool, unsigned long handle,
+                       enum zpool_mapmode mm);
+
+void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
+
+u64 zpool_get_total_size(struct zpool *pool);
+
+
+/**
+ * struct zpool_driver - driver implementation for zpool
+ * @type:      name of the driver.
+ * @list:      entry in the list of zpool drivers.
+ * @create:    create a new pool.
+ * @destroy:   destroy a pool.
+ * @malloc:    allocate mem from a pool.
+ * @free:      free mem from a pool.
+ * @shrink:    shrink the pool.
+ * @map:       map a handle.
+ * @unmap:     unmap a handle.
+ * @total_size:        get total size of a pool.
+ *
+ * This is created by a zpool implementation and registered
+ * with zpool.
+ */
+struct zpool_driver {
+       char *type;
+       struct module *owner;
+       atomic_t refcount;
+       struct list_head list;
+
+       void *(*create)(gfp_t gfp, struct zpool_ops *ops);
+       void (*destroy)(void *pool);
+
+       int (*malloc)(void *pool, size_t size, gfp_t gfp,
+                               unsigned long *handle);
+       void (*free)(void *pool, unsigned long handle);
+
+       int (*shrink)(void *pool, unsigned int pages,
+                               unsigned int *reclaimed);
+
+       void *(*map)(void *pool, unsigned long handle,
+                               enum zpool_mapmode mm);
+       void (*unmap)(void *pool, unsigned long handle);
+
+       u64 (*total_size)(void *pool);
+};
+
+void zpool_register_driver(struct zpool_driver *driver);
+
+int zpool_unregister_driver(struct zpool_driver *driver);
+
+int zpool_evict(void *pool, unsigned long handle);
+
+#endif
index 4e4f2f8b1ac222a3673ddb316d30b21f7527c7bf..dd2b5467d905816e72c22026f109e971bbef8724 100644 (file)
@@ -17,6 +17,7 @@
        {MR_MEMORY_HOTPLUG,     "memory_hotplug"},              \
        {MR_SYSCALL,            "syscall_or_cpuset"},           \
        {MR_MEMPOLICY_MBIND,    "mempolicy_mbind"},             \
+       {MR_NUMA_MISPLACED,     "numa_misplaced"},              \
        {MR_CMA,                "cma"}
 
 TRACE_EVENT(mm_migrate_pages,
index 1c9fabde69e4bb4aa5f13bc23a3b0d374549bbb6..ce0803b8d05f340f42116c1f129d08a645344e23 100644 (file)
@@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion,
 
        TP_PROTO(
                struct page *page,
-               unsigned long pfn,
-               int lru,
-               unsigned long flags
+               int lru
        ),
 
-       TP_ARGS(page, pfn, lru, flags),
+       TP_ARGS(page, lru),
 
        TP_STRUCT__entry(
                __field(struct page *,  page    )
@@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion,
 
        TP_fast_assign(
                __entry->page   = page;
-               __entry->pfn    = pfn;
+               __entry->pfn    = page_to_pfn(page);
                __entry->lru    = lru;
-               __entry->flags  = flags;
+               __entry->flags  = trace_pagemap_flags(page);
        ),
 
        /* Flag format is based on page-types.c formatting for pagemap */
@@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion,
 
 TRACE_EVENT(mm_lru_activate,
 
-       TP_PROTO(struct page *page, unsigned long pfn),
+       TP_PROTO(struct page *page),
 
-       TP_ARGS(page, pfn),
+       TP_ARGS(page),
 
        TP_STRUCT__entry(
                __field(struct page *,  page    )
@@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate,
 
        TP_fast_assign(
                __entry->page   = page;
-               __entry->pfn    = pfn;
+               __entry->pfn    = page_to_pfn(page);
        ),
 
        /* Flag format is based on page-types.c formatting for pagemap */
index 41066e49e8809464651d8335de4a77165a864033..a291b7ef473893891481f188b81f7dfce1550942 100644 (file)
@@ -807,15 +807,53 @@ config LOG_BUF_SHIFT
        range 12 21
        default 17
        help
-         Select kernel log buffer size as a power of 2.
+         Select the minimal kernel log buffer size as a power of 2.
+         The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
+         parameter, see below. Any higher size also might be forced
+         by "log_buf_len" boot parameter.
+
          Examples:
-                    17 => 128 KB
+                    17 => 128 KB
                     16 => 64 KB
-                    15 => 32 KB
-                    14 => 16 KB
+                    15 => 32 KB
+                    14 => 16 KB
                     13 =>  8 KB
                     12 =>  4 KB
 
+config LOG_CPU_MAX_BUF_SHIFT
+       int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
+       range 0 21
+       default 12 if !BASE_SMALL
+       default 0 if BASE_SMALL
+       help
+         This option allows to increase the default ring buffer size
+         according to the number of CPUs. The value defines the contribution
+         of each CPU as a power of 2. The used space is typically only few
+         lines however it might be much more when problems are reported,
+         e.g. backtraces.
+
+         The increased size means that a new buffer has to be allocated and
+         the original static one is unused. It makes sense only on systems
+         with more CPUs. Therefore this value is used only when the sum of
+         contributions is greater than the half of the default kernel ring
+         buffer as defined by LOG_BUF_SHIFT. The default values are set
+         so that more than 64 CPUs are needed to trigger the allocation.
+
+         Also this option is ignored when "log_buf_len" kernel parameter is
+         used as it forces an exact (power of two) size of the ring buffer.
+
+         The number of possible CPUs is used for this computation ignoring
+         hotplugging making the compuation optimal for the the worst case
+         scenerio while allowing a simple algorithm to be used from bootup.
+
+         Examples shift values and their meaning:
+                    17 => 128 KB for each CPU
+                    16 =>  64 KB for each CPU
+                    15 =>  32 KB for each CPU
+                    14 =>  16 KB for each CPU
+                    13 =>   8 KB for each CPU
+                    12 =>   4 KB for each CPU
+
 #
 # Architectures with an unreliable sched_clock() should select this:
 #
index 8e9bc9c3dbb7ef49c360bf2775f57e0b74414cc6..c447cd9848d1bbce5f8e013c225b7fe9a26645fd 100644 (file)
@@ -106,7 +106,7 @@ static inline struct audit_entry *audit_init_entry(u32 field_count)
        if (unlikely(!entry))
                return NULL;
 
-       fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL);
+       fields = kcalloc(field_count, sizeof(*fields), GFP_KERNEL);
        if (unlikely(!fields)) {
                kfree(entry);
                return NULL;
@@ -160,7 +160,7 @@ static __u32 *classes[AUDIT_SYSCALL_CLASSES];
 
 int __init audit_register_class(int class, unsigned *list)
 {
-       __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL);
+       __u32 *p = kcalloc(AUDIT_BITMASK_SIZE, sizeof(__u32), GFP_KERNEL);
        if (!p)
                return -ENOMEM;
        while (*list != ~0U) {
index e5c4668f1799d15ce1d527b1e357d795f3d748ea..88c6b3e425834e89943bb1a291a7a350333e3593 100644 (file)
@@ -455,6 +455,7 @@ static void exit_mm(struct task_struct * tsk)
        task_unlock(tsk);
        mm_update_next_owner(mm);
        mmput(mm);
+       clear_thread_flag(TIF_MEMDIE);
 }
 
 /*
index 13e839dbca07ea72fb06d0b5ecbb97379c4a7f57..de1a6bb6861db5e92f0884e2b733cb5b6988679e 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/poll.h>
 #include <linux/irq_work.h>
 #include <linux/utsname.h>
+#include <linux/ctype.h>
 
 #include <asm/uaccess.h>
 
@@ -56,7 +57,7 @@
 
 int console_printk[4] = {
        CONSOLE_LOGLEVEL_DEFAULT,       /* console_loglevel */
-       DEFAULT_MESSAGE_LOGLEVEL,       /* default_message_loglevel */
+       MESSAGE_LOGLEVEL_DEFAULT,       /* default_message_loglevel */
        CONSOLE_LOGLEVEL_MIN,           /* minimum_console_loglevel */
        CONSOLE_LOGLEVEL_DEFAULT,       /* default_console_loglevel */
 };
@@ -113,9 +114,9 @@ static int __down_trylock_console_sem(unsigned long ip)
  * This is used for debugging the mess that is the VT code by
  * keeping track if we have the console semaphore held. It's
  * definitely not the perfect debug tool (we don't know if _WE_
- * hold it are racing, but it helps tracking those weird code
- * path in the console code where we end up in places I want
- * locked without the console sempahore held
+ * hold it and are racing, but it helps tracking those weird code
+ * paths in the console code where we end up in places I want
+ * locked without the console sempahore held).
  */
 static int console_locked, console_suspended;
 
@@ -146,8 +147,8 @@ static int console_may_schedule;
  * the overall length of the record.
  *
  * The heads to the first and last entry in the buffer, as well as the
- * sequence numbers of these both entries are maintained when messages
- * are stored..
+ * sequence numbers of these entries are maintained when messages are
+ * stored.
  *
  * If the heads indicate available messages, the length in the header
  * tells the start next message. A length == 0 for the next message
@@ -257,7 +258,7 @@ static u64 clear_seq;
 static u32 clear_idx;
 
 #define PREFIX_MAX             32
-#define LOG_LINE_MAX           1024 - PREFIX_MAX
+#define LOG_LINE_MAX           (1024 - PREFIX_MAX)
 
 /* record buffer */
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
@@ -266,6 +267,7 @@ static u32 clear_idx;
 #define LOG_ALIGN __alignof__(struct printk_log)
 #endif
 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
@@ -344,7 +346,7 @@ static int log_make_free_space(u32 msg_size)
        while (log_first_seq < log_next_seq) {
                if (logbuf_has_space(msg_size, false))
                        return 0;
-               /* drop old messages until we have enough continuous space */
+               /* drop old messages until we have enough contiguous space */
                log_first_idx = log_next(log_first_idx);
                log_first_seq++;
        }
@@ -453,11 +455,7 @@ static int log_store(int facility, int level,
        return msg->text_len;
 }
 
-#ifdef CONFIG_SECURITY_DMESG_RESTRICT
-int dmesg_restrict = 1;
-#else
-int dmesg_restrict;
-#endif
+int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
 
 static int syslog_action_restricted(int type)
 {
@@ -828,34 +826,74 @@ void log_buf_kexec_setup(void)
 /* requested log_buf_len from kernel cmdline */
 static unsigned long __initdata new_log_buf_len;
 
-/* save requested log_buf_len since it's too early to process it */
-static int __init log_buf_len_setup(char *str)
+/* we practice scaling the ring buffer by powers of 2 */
+static void __init log_buf_len_update(unsigned size)
 {
-       unsigned size = memparse(str, &str);
-
        if (size)
                size = roundup_pow_of_two(size);
        if (size > log_buf_len)
                new_log_buf_len = size;
+}
+
+/* save requested log_buf_len since it's too early to process it */
+static int __init log_buf_len_setup(char *str)
+{
+       unsigned size = memparse(str, &str);
+
+       log_buf_len_update(size);
 
        return 0;
 }
 early_param("log_buf_len", log_buf_len_setup);
 
+static void __init log_buf_add_cpu(void)
+{
+       unsigned int cpu_extra;
+
+       /*
+        * archs should set up cpu_possible_bits properly with
+        * set_cpu_possible() after setup_arch() but just in
+        * case lets ensure this is valid.
+        */
+       if (num_possible_cpus() == 1)
+               return;
+
+       cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
+
+       /* by default this will only continue through for large > 64 CPUs */
+       if (cpu_extra <= __LOG_BUF_LEN / 2)
+               return;
+
+       pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
+               __LOG_CPU_MAX_BUF_LEN);
+       pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
+               cpu_extra);
+       pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
+
+       log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
+}
+
 void __init setup_log_buf(int early)
 {
        unsigned long flags;
        char *new_log_buf;
        int free;
 
+       if (log_buf != __log_buf)
+               return;
+
+       if (!early && !new_log_buf_len)
+               log_buf_add_cpu();
+
        if (!new_log_buf_len)
                return;
 
        if (early) {
                new_log_buf =
-                       memblock_virt_alloc(new_log_buf_len, PAGE_SIZE);
+                       memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
        } else {
-               new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0);
+               new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
+                                                         LOG_ALIGN);
        }
 
        if (unlikely(!new_log_buf)) {
@@ -872,7 +910,7 @@ void __init setup_log_buf(int early)
        memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
        raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 
-       pr_info("log_buf_len: %d\n", log_buf_len);
+       pr_info("log_buf_len: %d bytes\n", log_buf_len);
        pr_info("early log buf free: %d(%d%%)\n",
                free, (free * 100) / __LOG_BUF_LEN);
 }
@@ -881,7 +919,7 @@ static bool __read_mostly ignore_loglevel;
 
 static int __init ignore_loglevel_setup(char *str)
 {
-       ignore_loglevel = 1;
+       ignore_loglevel = true;
        pr_info("debug: ignoring loglevel setting.\n");
 
        return 0;
@@ -947,11 +985,7 @@ static inline void boot_delay_msec(int level)
 }
 #endif
 
-#if defined(CONFIG_PRINTK_TIME)
-static bool printk_time = 1;
-#else
-static bool printk_time;
-#endif
+static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
 
 static size_t print_time(u64 ts, char *buf)
@@ -1310,7 +1344,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
                         * for pending data, not the size; return the count of
                         * records, not the length.
                         */
-                       error = log_next_idx - syslog_idx;
+                       error = log_next_seq - syslog_seq;
                } else {
                        u64 seq = syslog_seq;
                        u32 idx = syslog_idx;
@@ -1416,10 +1450,9 @@ static int have_callable_console(void)
 /*
  * Can we actually use the console at this time on this cpu?
  *
- * Console drivers may assume that per-cpu resources have
- * been allocated. So unless they're explicitly marked as
- * being able to cope (CON_ANYTIME) don't call them until
- * this CPU is officially up.
+ * Console drivers may assume that per-cpu resources have been allocated. So
+ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+ * call them until this CPU is officially up.
  */
 static inline int can_use_console(unsigned int cpu)
 {
@@ -1432,8 +1465,10 @@ static inline int can_use_console(unsigned int cpu)
  * console_lock held, and 'console_locked' set) if it
  * is successful, false otherwise.
  */
-static int console_trylock_for_printk(unsigned int cpu)
+static int console_trylock_for_printk(void)
 {
+       unsigned int cpu = smp_processor_id();
+
        if (!console_trylock())
                return 0;
        /*
@@ -1476,7 +1511,7 @@ static struct cont {
        struct task_struct *owner;      /* task of first print*/
        u64 ts_nsec;                    /* time of first print */
        u8 level;                       /* log level of first message */
-       u8 facility;                    /* log level of first message */
+       u8 facility;                    /* log facility of first message */
        enum log_flags flags;           /* prefix, newline flags */
        bool flushed:1;                 /* buffer sealed and committed */
 } cont;
@@ -1608,7 +1643,8 @@ asmlinkage int vprintk_emit(int facility, int level,
                 */
                if (!oops_in_progress && !lockdep_recursing(current)) {
                        recursion_bug = 1;
-                       goto out_restore_irqs;
+                       local_irq_restore(flags);
+                       return 0;
                }
                zap_locks();
        }
@@ -1716,21 +1752,30 @@ asmlinkage int vprintk_emit(int facility, int level,
 
        logbuf_cpu = UINT_MAX;
        raw_spin_unlock(&logbuf_lock);
+       lockdep_on();
+       local_irq_restore(flags);
 
        /* If called from the scheduler, we can not call up(). */
        if (!in_sched) {
+               lockdep_off();
+               /*
+                * Disable preemption to avoid being preempted while holding
+                * console_sem which would prevent anyone from printing to
+                * console
+                */
+               preempt_disable();
+
                /*
                 * Try to acquire and then immediately release the console
                 * semaphore.  The release will print out buffers and wake up
                 * /dev/kmsg and syslog() users.
                 */
-               if (console_trylock_for_printk(this_cpu))
+               if (console_trylock_for_printk())
                        console_unlock();
+               preempt_enable();
+               lockdep_on();
        }
 
-       lockdep_on();
-out_restore_irqs:
-       local_irq_restore(flags);
        return printed_len;
 }
 EXPORT_SYMBOL(vprintk_emit);
@@ -1802,7 +1847,7 @@ EXPORT_SYMBOL(printk);
 
 #define LOG_LINE_MAX           0
 #define PREFIX_MAX             0
-#define LOG_LINE_MAX 0
+
 static u64 syslog_seq;
 static u32 syslog_idx;
 static u64 console_seq;
@@ -1881,11 +1926,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
        return 0;
 }
 /*
- * Set up a list of consoles.  Called from init/main.c
+ * Set up a console.  Called via do_early_param() in init/main.c
+ * for each "console=" parameter in the boot command line.
  */
 static int __init console_setup(char *str)
 {
-       char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
+       char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
        char *s, *options, *brl_options = NULL;
        int idx;
 
@@ -1902,7 +1948,8 @@ static int __init console_setup(char *str)
                strncpy(buf, str, sizeof(buf) - 1);
        }
        buf[sizeof(buf) - 1] = 0;
-       if ((options = strchr(str, ',')) != NULL)
+       options = strchr(str, ',');
+       if (options)
                *(options++) = 0;
 #ifdef __sparc__
        if (!strcmp(str, "ttya"))
@@ -1911,7 +1958,7 @@ static int __init console_setup(char *str)
                strcpy(buf, "ttyS1");
 #endif
        for (s = buf; *s; s++)
-               if ((*s >= '0' && *s <= '9') || *s == ',')
+               if (isdigit(*s) || *s == ',')
                        break;
        idx = simple_strtoul(s, NULL, 10);
        *s = 0;
@@ -1950,7 +1997,6 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
             i++, c++)
                if (strcmp(c->name, name) == 0 && c->index == idx) {
                        strlcpy(c->name, name_new, sizeof(c->name));
-                       c->name[sizeof(c->name) - 1] = 0;
                        c->options = options;
                        c->index = idx_new;
                        return i;
@@ -1959,12 +2005,12 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
        return -1;
 }
 
-bool console_suspend_enabled = 1;
+bool console_suspend_enabled = true;
 EXPORT_SYMBOL(console_suspend_enabled);
 
 static int __init console_suspend_disable(char *str)
 {
-       console_suspend_enabled = 0;
+       console_suspend_enabled = false;
        return 1;
 }
 __setup("no_console_suspend", console_suspend_disable);
@@ -2045,8 +2091,8 @@ EXPORT_SYMBOL(console_lock);
 /**
  * console_trylock - try to lock the console system for exclusive use.
  *
- * Tried to acquire a lock which guarantees that the caller has
- * exclusive access to the console system and the console_drivers list.
+ * Try to acquire a lock which guarantees that the caller has exclusive
+ * access to the console system and the console_drivers list.
  *
  * returns 1 on success, and 0 on failure to acquire the lock.
  */
@@ -2618,14 +2664,13 @@ EXPORT_SYMBOL(__printk_ratelimit);
 bool printk_timed_ratelimit(unsigned long *caller_jiffies,
                        unsigned int interval_msecs)
 {
-       if (*caller_jiffies == 0
-                       || !time_in_range(jiffies, *caller_jiffies,
-                                       *caller_jiffies
-                                       + msecs_to_jiffies(interval_msecs))) {
-               *caller_jiffies = jiffies;
-               return true;
-       }
-       return false;
+       unsigned long elapsed = jiffies - *caller_jiffies;
+
+       if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
+               return false;
+
+       *caller_jiffies = jiffies;
+       return true;
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
 
index 487653b5844f92c722ae781210064824310452a7..aff8aa14f54795faa42ae9482da75e3ea9610a82 100644 (file)
@@ -670,7 +670,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
                        if (cond_func(cpu, info)) {
                                ret = smp_call_function_single(cpu, func,
                                                                info, wait);
-                               WARN_ON_ONCE(!ret);
+                               WARN_ON_ONCE(ret);
                        }
                preempt_enable();
        }
index 75b22e22a72c1abd4865bf7c0313af5514970939..75875a741b5e7f9cb26fcd837609638b1ffafda8 100644 (file)
@@ -1240,8 +1240,7 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
                .proc_handler   = hugetlb_sysctl_handler,
-               .extra1         = (void *)&hugetlb_zero,
-               .extra2         = (void *)&hugetlb_infinity,
+               .extra1         = &zero,
        },
 #ifdef CONFIG_NUMA
        {
@@ -1250,8 +1249,7 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
                .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
-               .extra1         = (void *)&hugetlb_zero,
-               .extra2         = (void *)&hugetlb_infinity,
+               .extra1         = &zero,
        },
 #endif
         {
@@ -1274,8 +1272,7 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(unsigned long),
                .mode           = 0644,
                .proc_handler   = hugetlb_overcommit_handler,
-               .extra1         = (void *)&hugetlb_zero,
-               .extra2         = (void *)&hugetlb_infinity,
+               .extra1         = &zero,
        },
 #endif
        {
index c3319bd1b0408c1f5822748a4d0b1567c799760d..51b29e9d2ba65a700c15cb71923bce1359ebfa41 100644 (file)
@@ -260,9 +260,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
                        return;
 
                if (hardlockup_panic)
-                       panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+                       panic("Watchdog detected hard LOCKUP on cpu %d",
+                             this_cpu);
                else
-                       WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+                       WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
+                            this_cpu);
 
                __this_cpu_write(hard_watchdog_warn, true);
                return;
@@ -345,7 +347,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                        }
                }
 
-               printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+               pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
                        smp_processor_id(), duration,
                        current->comm, task_pid_nr(current));
                print_modules();
@@ -484,7 +486,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
        if (PTR_ERR(event) == -EOPNOTSUPP)
                pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
        else if (PTR_ERR(event) == -ENOENT)
-               pr_warning("disabled (cpu%i): hardware events not enabled\n",
+               pr_warn("disabled (cpu%i): hardware events not enabled\n",
                         cpu);
        else
                pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
index a8a775730c09c27c4da8188cf84cc5e6c7608fe0..df872659ddd3d699a3f204b50720c59ff0f29092 100644 (file)
@@ -396,6 +396,39 @@ config CPU_RMAP
 config DQL
        bool
 
+config GLOB
+       bool
+#      This actually supports modular compilation, but the module overhead
+#      is ridiculous for the amount of code involved.  Until an out-of-tree
+#      driver asks for it, we'll just link it directly it into the kernel
+#      when required.  Since we're ignoring out-of-tree users, there's also
+#      no need bother prompting for a manual decision:
+#      prompt "glob_match() function"
+       help
+         This option provides a glob_match function for performing
+         simple text pattern matching.  It originated in the ATA code
+         to blacklist particular drive models, but other device drivers
+         may need similar functionality.
+
+         All drivers in the Linux kernel tree that require this function
+         should automatically select this option.  Say N unless you
+         are compiling an out-of tree driver which tells you that it
+         depends on this.
+
+config GLOB_SELFTEST
+       bool "glob self-test on init"
+       default n
+       depends on GLOB
+       help
+         This option enables a simple self-test of the glob_match
+         function on startup.  It is primarily useful for people
+         working on the code to ensure they haven't introduced any
+         regressions.
+
+         It only adds a little bit of code and slows kernel boot (or
+         module load) by a small amount, so you're welcome to play with
+         it, but you probably don't need it.
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
index cfe7df8f62ccb3b46d7b976fdd2acdefd9007540..cb45f59685e69530caf6d5b11f14c2e37d01772c 100644 (file)
@@ -15,7 +15,7 @@ config PRINTK_TIME
          The behavior is also controlled by the kernel command line
          parameter printk.time=1. See Documentation/kernel-parameters.txt
 
-config DEFAULT_MESSAGE_LOGLEVEL
+config MESSAGE_LOGLEVEL_DEFAULT
        int "Default message log level (1-7)"
        range 1 7
        default "4"
index 8427df95dade789ccffd7e7388b377ce00e84fda..d6b4bc496408e5ce44735606286cb923fbbefee3 100644 (file)
@@ -137,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
 
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
+obj-$(CONFIG_GLOB) += glob.o
+
 obj-$(CONFIG_MPILIB) += mpi/
 obj-$(CONFIG_SIGNATURE) += digsig.o
 
index 06f7e4fe8d2de4046a3139106058b9c831c9b789..1e031f2c9aba1ba4f536147ef5a2adbc6954addb 100644 (file)
@@ -40,9 +40,9 @@
  * for the best explanations of this ordering.
  */
 
-int __bitmap_empty(const unsigned long *bitmap, int bits)
+int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
 {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap[k])
                        return 0;
@@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_empty);
 
-int __bitmap_full(const unsigned long *bitmap, int bits)
+int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
 {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (~bitmap[k])
                        return 0;
@@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, int bits)
 EXPORT_SYMBOL(__bitmap_full);
 
 int __bitmap_equal(const unsigned long *bitmap1,
-               const unsigned long *bitmap2, int bits)
+               const unsigned long *bitmap2, unsigned int bits)
 {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap1[k] != bitmap2[k])
                        return 0;
@@ -86,14 +86,14 @@ int __bitmap_equal(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_equal);
 
-void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
 {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                dst[k] = ~src[k];
 
        if (bits % BITS_PER_LONG)
-               dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+               dst[k] = ~src[k];
 }
 EXPORT_SYMBOL(__bitmap_complement);
 
@@ -182,23 +182,26 @@ void __bitmap_shift_left(unsigned long *dst,
 EXPORT_SYMBOL(__bitmap_shift_left);
 
 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                               const unsigned long *bitmap2, unsigned int bits)
 {
-       int k;
-       int nr = BITS_TO_LONGS(bits);
+       unsigned int k;
+       unsigned int lim = bits/BITS_PER_LONG;
        unsigned long result = 0;
 
-       for (k = 0; k < nr; k++)
+       for (k = 0; k < lim; k++)
                result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+       if (bits % BITS_PER_LONG)
+               result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+                          BITMAP_LAST_WORD_MASK(bits));
        return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
 
 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                               const unsigned long *bitmap2, unsigned int bits)
 {
-       int k;
-       int nr = BITS_TO_LONGS(bits);
+       unsigned int k;
+       unsigned int nr = BITS_TO_LONGS(bits);
 
        for (k = 0; k < nr; k++)
                dst[k] = bitmap1[k] | bitmap2[k];
@@ -206,10 +209,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_or);
 
 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                               const unsigned long *bitmap2, unsigned int bits)
 {
-       int k;
-       int nr = BITS_TO_LONGS(bits);
+       unsigned int k;
+       unsigned int nr = BITS_TO_LONGS(bits);
 
        for (k = 0; k < nr; k++)
                dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -217,22 +220,25 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_xor);
 
 int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                               const unsigned long *bitmap2, unsigned int bits)
 {
-       int k;
-       int nr = BITS_TO_LONGS(bits);
+       unsigned int k;
+       unsigned int lim = bits/BITS_PER_LONG;
        unsigned long result = 0;
 
-       for (k = 0; k < nr; k++)
+       for (k = 0; k < lim; k++)
                result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+       if (bits % BITS_PER_LONG)
+               result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
+                          BITMAP_LAST_WORD_MASK(bits));
        return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
 
 int __bitmap_intersects(const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                       const unsigned long *bitmap2, unsigned int bits)
 {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap1[k] & bitmap2[k])
                        return 1;
@@ -245,9 +251,9 @@ int __bitmap_intersects(const unsigned long *bitmap1,
 EXPORT_SYMBOL(__bitmap_intersects);
 
 int __bitmap_subset(const unsigned long *bitmap1,
-                               const unsigned long *bitmap2, int bits)
+                   const unsigned long *bitmap2, unsigned int bits)
 {
-       int k, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
        for (k = 0; k < lim; ++k)
                if (bitmap1[k] & ~bitmap2[k])
                        return 0;
@@ -259,9 +265,10 @@ int __bitmap_subset(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_subset);
 
-int __bitmap_weight(const unsigned long *bitmap, int bits)
+int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 {
-       int k, w = 0, lim = bits/BITS_PER_LONG;
+       unsigned int k, lim = bits/BITS_PER_LONG;
+       int w = 0;
 
        for (k = 0; k < lim; k++)
                w += hweight_long(bitmap[k]);
@@ -273,42 +280,42 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-void bitmap_set(unsigned long *map, int start, int nr)
+void bitmap_set(unsigned long *map, unsigned int start, int len)
 {
        unsigned long *p = map + BIT_WORD(start);
-       const int size = start + nr;
+       const unsigned int size = start + len;
        int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
        unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
 
-       while (nr - bits_to_set >= 0) {
+       while (len - bits_to_set >= 0) {
                *p |= mask_to_set;
-               nr -= bits_to_set;
+               len -= bits_to_set;
                bits_to_set = BITS_PER_LONG;
                mask_to_set = ~0UL;
                p++;
        }
-       if (nr) {
+       if (len) {
                mask_to_set &= BITMAP_LAST_WORD_MASK(size);
                *p |= mask_to_set;
        }
 }
 EXPORT_SYMBOL(bitmap_set);
 
-void bitmap_clear(unsigned long *map, int start, int nr)
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
        unsigned long *p = map + BIT_WORD(start);
-       const int size = start + nr;
+       const unsigned int size = start + len;
        int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
        unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
 
-       while (nr - bits_to_clear >= 0) {
+       while (len - bits_to_clear >= 0) {
                *p &= ~mask_to_clear;
-               nr -= bits_to_clear;
+               len -= bits_to_clear;
                bits_to_clear = BITS_PER_LONG;
                mask_to_clear = ~0UL;
                p++;
        }
-       if (nr) {
+       if (len) {
                mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
                *p &= ~mask_to_clear;
        }
@@ -664,13 +671,8 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
 
 int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 {
-       char *nl  = strchr(bp, '\n');
-       int len;
-
-       if (nl)
-               len = nl - bp;
-       else
-               len = strlen(bp);
+       char *nl  = strchrnul(bp, '\n');
+       int len = nl - bp;
 
        return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
 }
@@ -716,7 +718,7 @@ EXPORT_SYMBOL(bitmap_parselist_user);
  *
  * If for example, just bits 4 through 7 are set in @buf, then @pos
  * values 4 through 7 will get mapped to 0 through 3, respectively,
- * and other @pos values will get mapped to 0.  When @pos value 7
+ * and other @pos values will get mapped to -1.  When @pos value 7
  * gets mapped to (returns) @ord value 3 in this example, that means
  * that bit 7 is the 3rd (starting with 0th) set bit in @buf.
  *
@@ -1046,7 +1048,7 @@ enum {
        REG_OP_RELEASE,         /* clear all bits in region */
 };
 
-static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op)
 {
        int nbits_reg;          /* number of bits in region */
        int index;              /* index first long of region in bitmap */
@@ -1112,11 +1114,11 @@ done:
  * Return the bit offset in bitmap of the allocated region,
  * or -errno on failure.
  */
-int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
 {
-       int pos, end;           /* scans bitmap by regions of size order */
+       unsigned int pos, end;          /* scans bitmap by regions of size order */
 
-       for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+       for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
                if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
                        continue;
                __reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1137,7 +1139,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
  *
  * No return value.
  */
-void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
 {
        __reg_op(bitmap, pos, order, REG_OP_RELEASE);
 }
@@ -1154,12 +1156,11 @@ EXPORT_SYMBOL(bitmap_release_region);
  * Return 0 on success, or %-EBUSY if specified region wasn't
  * free (not all bits were zero).
  */
-int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 {
        if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
                return -EBUSY;
-       __reg_op(bitmap, pos, order, REG_OP_ALLOC);
-       return 0;
+       return __reg_op(bitmap, pos, order, REG_OP_ALLOC);
 }
 EXPORT_SYMBOL(bitmap_allocate_region);
 
index d4932f745e9214aaf62d8ad54b0f1092dd555414..76a712e6e20e3d0480d9f338565079a9089560dd 100644 (file)
@@ -121,11 +121,7 @@ EXPORT_SYMBOL(get_options);
  *     @retptr: (output) Optional pointer to next char after parse completes
  *
  *     Parses a string into a number.  The number stored at @ptr is
- *     potentially suffixed with %K (for kilobytes, or 1024 bytes),
- *     %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
- *     1073741824).  If the number is suffixed with K, M, or G, then
- *     the return value is the number multiplied by one kilobyte, one
- *     megabyte, or one gigabyte, respectively.
+ *     potentially suffixed with K, M, G, T, P, E.
  */
 
 unsigned long long memparse(const char *ptr, char **retptr)
@@ -135,6 +131,15 @@ unsigned long long memparse(const char *ptr, char **retptr)
        unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
 
        switch (*endptr) {
+       case 'E':
+       case 'e':
+               ret <<= 10;
+       case 'P':
+       case 'p':
+               ret <<= 10;
+       case 'T':
+       case 't':
+               ret <<= 10;
        case 'G':
        case 'g':
                ret <<= 10;
diff --git a/lib/glob.c b/lib/glob.c
new file mode 100644 (file)
index 0000000..500fc80
--- /dev/null
@@ -0,0 +1,287 @@
+#include <linux/module.h>
+#include <linux/glob.h>
+
+/*
+ * The only reason this code can be compiled as a module is because the
+ * ATA code that depends on it can be as well.  In practice, they're
+ * both usually compiled in and the module overhead goes away.
+ */
+MODULE_DESCRIPTION("glob(7) matching");
+MODULE_LICENSE("Dual MIT/GPL");
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match.  The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns.  Thus, it
+ * does not preprocess the patterns.  It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+bool __pure glob_match(char const *pat, char const *str)
+{
+       /*
+        * Backtrack to previous * on mismatch and retry starting one
+        * character later in the string.  Because * matches all characters
+        * (no exception for /), it can be easily proved that there's
+        * never a need to backtrack multiple levels.
+        */
+       char const *back_pat = NULL, *back_str = back_str;
+
+       /*
+        * Loop over each token (character or class) in pat, matching
+        * it against the remaining unmatched tail of str.  Return false
+        * on mismatch, or true after matching the trailing nul bytes.
+        */
+       for (;;) {
+               unsigned char c = *str++;
+               unsigned char d = *pat++;
+
+               switch (d) {
+               case '?':       /* Wildcard: anything but nul */
+                       if (c == '\0')
+                               return false;
+                       break;
+               case '*':       /* Any-length wildcard */
+                       if (*pat == '\0')       /* Optimize trailing * case */
+                               return true;
+                       back_pat = pat;
+                       back_str = --str;       /* Allow zero-length match */
+                       break;
+               case '[': {     /* Character class */
+                       bool match = false, inverted = (*pat == '!');
+                       char const *class = pat + inverted;
+                       unsigned char a = *class++;
+
+                       /*
+                        * Iterate over each span in the character class.
+                        * A span is either a single character a, or a
+                        * range a-b.  The first span may begin with ']'.
+                        */
+                       do {
+                               unsigned char b = a;
+
+                               if (a == '\0')  /* Malformed */
+                                       goto literal;
+
+                               if (class[0] == '-' && class[1] != ']') {
+                                       b = class[1];
+
+                                       if (b == '\0')
+                                               goto literal;
+
+                                       class += 2;
+                                       /* Any special action if a > b? */
+                               }
+                               match |= (a <= c && c <= b);
+                       } while ((a = *class++) != ']');
+
+                       if (match == inverted)
+                               goto backtrack;
+                       pat = class;
+                       }
+                       break;
+               case '\\':
+                       d = *pat++;
+                       /*FALLTHROUGH*/
+               default:        /* Literal character */
+literal:
+                       if (c == d) {
+                               if (d == '\0')
+                                       return true;
+                               break;
+                       }
+backtrack:
+                       if (c == '\0' || !back_pat)
+                               return false;   /* No point continuing */
+                       /* Try again from last *, one character later in str. */
+                       pat = back_pat;
+                       str = ++back_str;
+                       break;
+               }
+       }
+}
+EXPORT_SYMBOL(glob_match);
+
+
+#ifdef CONFIG_GLOB_SELFTEST
+
+#include <linux/printk.h>
+#include <linux/moduleparam.h>
+
+/* Boot with "glob.verbose=1" to show successful tests, too */
+static bool verbose = false;
+module_param(verbose, bool, 0);
+
+struct glob_test {
+       char const *pat, *str;
+       bool expected;
+};
+
+static bool __pure __init test(char const *pat, char const *str, bool expected)
+{
+       bool match = glob_match(pat, str);
+       bool success = match == expected;
+
+       /* Can't get string literals into a particular section, so... */
+       static char const msg_error[] __initconst =
+               KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
+       static char const msg_ok[] __initconst =
+               KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
+       static char const mismatch[] __initconst = "mismatch";
+       char const *message;
+
+       if (!success)
+               message = msg_error;
+       else if (verbose)
+               message = msg_ok;
+       else
+               return success;
+
+       printk(message, pat, str, mismatch + 3*match);
+       return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section.  The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static char const glob_tests[] __initconst =
+       /* Some basic tests */
+       "1" "a\0" "a\0"
+       "0" "a\0" "b\0"
+       "0" "a\0" "aa\0"
+       "0" "a\0" "\0"
+       "1" "\0" "\0"
+       "0" "\0" "a\0"
+       /* Simple character class tests */
+       "1" "[a]\0" "a\0"
+       "0" "[a]\0" "b\0"
+       "0" "[!a]\0" "a\0"
+       "1" "[!a]\0" "b\0"
+       "1" "[ab]\0" "a\0"
+       "1" "[ab]\0" "b\0"
+       "0" "[ab]\0" "c\0"
+       "1" "[!ab]\0" "c\0"
+       "1" "[a-c]\0" "b\0"
+       "0" "[a-c]\0" "d\0"
+       /* Corner cases in character class parsing */
+       "1" "[a-c-e-g]\0" "-\0"
+       "0" "[a-c-e-g]\0" "d\0"
+       "1" "[a-c-e-g]\0" "f\0"
+       "1" "[]a-ceg-ik[]\0" "a\0"
+       "1" "[]a-ceg-ik[]\0" "]\0"
+       "1" "[]a-ceg-ik[]\0" "[\0"
+       "1" "[]a-ceg-ik[]\0" "h\0"
+       "0" "[]a-ceg-ik[]\0" "f\0"
+       "0" "[!]a-ceg-ik[]\0" "h\0"
+       "0" "[!]a-ceg-ik[]\0" "]\0"
+       "1" "[!]a-ceg-ik[]\0" "f\0"
+       /* Simple wild cards */
+       "1" "?\0" "a\0"
+       "0" "?\0" "aa\0"
+       "0" "??\0" "a\0"
+       "1" "?x?\0" "axb\0"
+       "0" "?x?\0" "abx\0"
+       "0" "?x?\0" "xab\0"
+       /* Asterisk wild cards (backtracking) */
+       "0" "*??\0" "a\0"
+       "1" "*??\0" "ab\0"
+       "1" "*??\0" "abc\0"
+       "1" "*??\0" "abcd\0"
+       "0" "??*\0" "a\0"
+       "1" "??*\0" "ab\0"
+       "1" "??*\0" "abc\0"
+       "1" "??*\0" "abcd\0"
+       "0" "?*?\0" "a\0"
+       "1" "?*?\0" "ab\0"
+       "1" "?*?\0" "abc\0"
+       "1" "?*?\0" "abcd\0"
+       "1" "*b\0" "b\0"
+       "1" "*b\0" "ab\0"
+       "0" "*b\0" "ba\0"
+       "1" "*b\0" "bb\0"
+       "1" "*b\0" "abb\0"
+       "1" "*b\0" "bab\0"
+       "1" "*bc\0" "abbc\0"
+       "1" "*bc\0" "bc\0"
+       "1" "*bc\0" "bbc\0"
+       "1" "*bc\0" "bcbc\0"
+       /* Multiple asterisks (complex backtracking) */
+       "1" "*ac*\0" "abacadaeafag\0"
+       "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+       "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+       "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+       "1" "*abcd*\0" "abcabcabcabcdefg\0"
+       "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+       "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+       "0" "*abcd*\0" "abcabcabcabcefg\0"
+       "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+static int __init glob_init(void)
+{
+       unsigned successes = 0;
+       unsigned n = 0;
+       char const *p = glob_tests;
+       static char const message[] __initconst =
+               KERN_INFO "glob: %u self-tests passed, %u failed\n";
+
+       /*
+        * Tests are jammed together in a string.  The first byte is '1'
+        * or '0' to indicate the expected outcome, or '\0' to indicate the
+        * end of the tests.  Then come two null-terminated strings: the
+        * pattern and the string to match it against.
+        */
+       while (*p) {
+               bool expected = *p++ & 1;
+               char const *pat = p;
+
+               p += strlen(p) + 1;
+               successes += test(pat, p, expected);
+               p += strlen(p) + 1;
+               n++;
+       }
+
+       n -= successes;
+       printk(message, successes, n);
+
+       /* What's the errno for "kernel bug detected"?  Guess... */
+       return n ? -ECANCELED : 0;
+}
+
+/* We need a dummy exit function to allow unload */
+static void __exit glob_fini(void) { }
+
+module_init(glob_init);
+module_exit(glob_fini);
+
+#endif /* CONFIG_GLOB_SELFTEST */
index 358a368a2947057ef9d9309c9dd4fc893523d63d..89b485a2a58d1755850d9baa2c50a1dc58fa3a15 100644 (file)
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
 EXPORT_SYMBOL_GPL(klist_add_tail);
 
 /**
- * klist_add_after - Init a klist_node and add it after an existing node
+ * klist_add_behind - Init a klist_node and add it after an existing node
  * @n: node we're adding.
  * @pos: node to put @n after
  */
-void klist_add_after(struct klist_node *n, struct klist_node *pos)
+void klist_add_behind(struct klist_node *n, struct klist_node *pos)
 {
        struct klist *k = knode_klist(pos);
 
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
        list_add(&n->n_node, &pos->n_node);
        spin_unlock(&k->k_lock);
 }
-EXPORT_SYMBOL_GPL(klist_add_after);
+EXPORT_SYMBOL_GPL(klist_add_behind);
 
 /**
  * klist_add_before - Init a klist_node and add it before an existing node
index 1183fa70a44d26cfc673b6d5d2f645b7026f50dd..12bcba1c8612bcf998fe39548432548613591911 100644 (file)
@@ -1,3 +1,6 @@
+
+#define pr_fmt(fmt) "list_sort_test: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/list_sort.h>
@@ -47,6 +50,7 @@ static void merge_and_restore_back_links(void *priv,
                                struct list_head *a, struct list_head *b)
 {
        struct list_head *tail = head;
+       u8 count = 0;
 
        while (a && b) {
                /* if equal, take 'a' -- important for sort stability */
@@ -70,7 +74,8 @@ static void merge_and_restore_back_links(void *priv,
                 * element comparison is needed, so the client's cmp()
                 * routine can invoke cond_resched() periodically.
                 */
-               (*cmp)(priv, tail->next, tail->next);
+               if (unlikely(!(++count)))
+                       (*cmp)(priv, tail->next, tail->next);
 
                tail->next->prev = tail;
                tail = tail->next;
@@ -123,9 +128,7 @@ void list_sort(void *priv, struct list_head *head,
                }
                if (lev > max_lev) {
                        if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
-                               printk_once(KERN_DEBUG "list passed to"
-                                       " list_sort() too long for"
-                                       " efficiency\n");
+                               printk_once(KERN_DEBUG "list too long for efficiency\n");
                                lev--;
                        }
                        max_lev = lev;
@@ -168,27 +171,25 @@ static struct debug_el **elts __initdata;
 static int __init check(struct debug_el *ela, struct debug_el *elb)
 {
        if (ela->serial >= TEST_LIST_LEN) {
-               printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
-                               ela->serial);
+               pr_err("error: incorrect serial %d\n", ela->serial);
                return -EINVAL;
        }
        if (elb->serial >= TEST_LIST_LEN) {
-               printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
-                               elb->serial);
+               pr_err("error: incorrect serial %d\n", elb->serial);
                return -EINVAL;
        }
        if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
-               printk(KERN_ERR "list_sort_test: error: phantom element\n");
+               pr_err("error: phantom element\n");
                return -EINVAL;
        }
        if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
-               printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
-                               ela->poison1, ela->poison2);
+               pr_err("error: bad poison: %#x/%#x\n",
+                       ela->poison1, ela->poison2);
                return -EINVAL;
        }
        if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
-               printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
-                               elb->poison1, elb->poison2);
+               pr_err("error: bad poison: %#x/%#x\n",
+                       elb->poison1, elb->poison2);
                return -EINVAL;
        }
        return 0;
@@ -207,25 +208,23 @@ static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
 
 static int __init list_sort_test(void)
 {
-       int i, count = 1, err = -EINVAL;
+       int i, count = 1, err = -ENOMEM;
        struct debug_el *el;
-       struct list_head *cur, *tmp;
+       struct list_head *cur;
        LIST_HEAD(head);
 
-       printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
+       pr_debug("start testing list_sort()\n");
 
-       elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
+       elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL);
        if (!elts) {
-               printk(KERN_ERR "list_sort_test: error: cannot allocate "
-                               "memory\n");
-               goto exit;
+               pr_err("error: cannot allocate memory\n");
+               return err;
        }
 
        for (i = 0; i < TEST_LIST_LEN; i++) {
                el = kmalloc(sizeof(*el), GFP_KERNEL);
                if (!el) {
-                       printk(KERN_ERR "list_sort_test: error: cannot "
-                                       "allocate memory\n");
+                       pr_err("error: cannot allocate memory\n");
                        goto exit;
                }
                 /* force some equivalencies */
@@ -239,52 +238,52 @@ static int __init list_sort_test(void)
 
        list_sort(NULL, &head, cmp);
 
+       err = -EINVAL;
        for (cur = head.next; cur->next != &head; cur = cur->next) {
                struct debug_el *el1;
                int cmp_result;
 
                if (cur->next->prev != cur) {
-                       printk(KERN_ERR "list_sort_test: error: list is "
-                                       "corrupted\n");
+                       pr_err("error: list is corrupted\n");
                        goto exit;
                }
 
                cmp_result = cmp(NULL, cur, cur->next);
                if (cmp_result > 0) {
-                       printk(KERN_ERR "list_sort_test: error: list is not "
-                                       "sorted\n");
+                       pr_err("error: list is not sorted\n");
                        goto exit;
                }
 
                el = container_of(cur, struct debug_el, list);
                el1 = container_of(cur->next, struct debug_el, list);
                if (cmp_result == 0 && el->serial >= el1->serial) {
-                       printk(KERN_ERR "list_sort_test: error: order of "
-                                       "equivalent elements not preserved\n");
+                       pr_err("error: order of equivalent elements not "
+                               "preserved\n");
                        goto exit;
                }
 
                if (check(el, el1)) {
-                       printk(KERN_ERR "list_sort_test: error: element check "
-                                       "failed\n");
+                       pr_err("error: element check failed\n");
                        goto exit;
                }
                count++;
        }
+       if (head.prev != cur) {
+               pr_err("error: list is corrupted\n");
+               goto exit;
+       }
+
 
        if (count != TEST_LIST_LEN) {
-               printk(KERN_ERR "list_sort_test: error: bad list length %d",
-                               count);
+               pr_err("error: bad list length %d", count);
                goto exit;
        }
 
        err = 0;
 exit:
+       for (i = 0; i < TEST_LIST_LEN; i++)
+               kfree(elts[i]);
        kfree(elts);
-       list_for_each_safe(cur, tmp, &head) {
-               list_del(cur);
-               kfree(container_of(cur, struct debug_el, list));
-       }
        return err;
 }
 module_init(list_sort_test);
index ed5c1454dd6288aacf85e4ee7db5e2e3ee947627..29033f319aea1f8f48e85374884f657a130519a4 100644 (file)
 int string_get_size(u64 size, const enum string_size_units units,
                    char *buf, int len)
 {
-       static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB",
-                                  "EB", "ZB", "YB", NULL};
-       static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
-                                "EiB", "ZiB", "YiB", NULL };
-       static const char **units_str[] = {
-               [STRING_UNITS_10] =  units_10,
+       static const char *const units_10[] = {
+               "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
+       };
+       static const char *const units_2[] = {
+               "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
+               NULL
+       };
+       static const char *const *const units_str[] = {
+               [STRING_UNITS_10] = units_10,
                [STRING_UNITS_2] = units_2,
        };
        static const unsigned int divisor[] = {
index bea3f3fa3f02a920fb92b9293c0adf06d72a52a6..4137bca5f8e8e5008ca88b7fcb801096856eaf0c 100644 (file)
@@ -3,7 +3,7 @@
 #include <linux/module.h>
 
 #define for_each_test(i, test) \
-       for (i = 0; i < sizeof(test) / sizeof(test[0]); i++)
+       for (i = 0; i < ARRAY_SIZE(test); i++)
 
 struct test_fail {
        const char *str;
index d63381e8e3331064463283420d872842e27c8a9a..d20ef458f1374ed62962a494b185926f90b547f8 100644 (file)
@@ -249,52 +249,6 @@ int zlib_deflateInit2(
     return zlib_deflateReset(strm);
 }
 
-/* ========================================================================= */
-#if 0
-int zlib_deflateSetDictionary(
-       z_streamp strm,
-       const Byte *dictionary,
-       uInt  dictLength
-)
-{
-    deflate_state *s;
-    uInt length = dictLength;
-    uInt n;
-    IPos hash_head = 0;
-
-    if (strm == NULL || strm->state == NULL || dictionary == NULL)
-       return Z_STREAM_ERROR;
-
-    s = (deflate_state *) strm->state;
-    if (s->status != INIT_STATE) return Z_STREAM_ERROR;
-
-    strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
-
-    if (length < MIN_MATCH) return Z_OK;
-    if (length > MAX_DIST(s)) {
-       length = MAX_DIST(s);
-#ifndef USE_DICT_HEAD
-       dictionary += dictLength - length; /* use the tail of the dictionary */
-#endif
-    }
-    memcpy((char *)s->window, dictionary, length);
-    s->strstart = length;
-    s->block_start = (long)length;
-
-    /* Insert all strings in the hash table (except for the last two bytes).
-     * s->lookahead stays null, so s->ins_h will be recomputed at the next
-     * call of fill_window.
-     */
-    s->ins_h = s->window[0];
-    UPDATE_HASH(s, s->ins_h, s->window[1]);
-    for (n = 0; n <= length - MIN_MATCH; n++) {
-       INSERT_STRING(s, n, hash_head);
-    }
-    if (hash_head) hash_head = 0;  /* to make compiler happy */
-    return Z_OK;
-}
-#endif  /*  0  */
-
 /* ========================================================================= */
 int zlib_deflateReset(
        z_streamp strm
@@ -326,45 +280,6 @@ int zlib_deflateReset(
     return Z_OK;
 }
 
-/* ========================================================================= */
-#if 0
-int zlib_deflateParams(
-       z_streamp strm,
-       int level,
-       int strategy
-)
-{
-    deflate_state *s;
-    compress_func func;
-    int err = Z_OK;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    s = (deflate_state *) strm->state;
-
-    if (level == Z_DEFAULT_COMPRESSION) {
-       level = 6;
-    }
-    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
-       return Z_STREAM_ERROR;
-    }
-    func = configuration_table[s->level].func;
-
-    if (func != configuration_table[level].func && strm->total_in != 0) {
-       /* Flush the last buffer: */
-       err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
-    }
-    if (s->level != level) {
-       s->level = level;
-       s->max_lazy_match   = configuration_table[level].max_lazy;
-       s->good_match       = configuration_table[level].good_length;
-       s->nice_match       = configuration_table[level].nice_length;
-       s->max_chain_length = configuration_table[level].max_chain;
-    }
-    s->strategy = strategy;
-    return err;
-}
-#endif  /*  0  */
-
 /* =========================================================================
  * Put a short in the pending buffer. The 16-bit value is put in MSB order.
  * IN assertion: the stream state is correct and there is enough room in
@@ -568,64 +483,6 @@ int zlib_deflateEnd(
     return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
 }
 
-/* =========================================================================
- * Copy the source state to the destination state.
- */
-#if 0
-int zlib_deflateCopy (
-       z_streamp dest,
-       z_streamp source
-)
-{
-#ifdef MAXSEG_64K
-    return Z_STREAM_ERROR;
-#else
-    deflate_state *ds;
-    deflate_state *ss;
-    ush *overlay;
-    deflate_workspace *mem;
-
-
-    if (source == NULL || dest == NULL || source->state == NULL) {
-        return Z_STREAM_ERROR;
-    }
-
-    ss = (deflate_state *) source->state;
-
-    *dest = *source;
-
-    mem = (deflate_workspace *) dest->workspace;
-
-    ds = &(mem->deflate_memory);
-
-    dest->state = (struct internal_state *) ds;
-    *ds = *ss;
-    ds->strm = dest;
-
-    ds->window = (Byte *) mem->window_memory;
-    ds->prev   = (Pos *)  mem->prev_memory;
-    ds->head   = (Pos *)  mem->head_memory;
-    overlay = (ush *) mem->overlay_memory;
-    ds->pending_buf = (uch *) overlay;
-
-    memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-    memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
-    memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
-    memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-
-    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-
-    ds->l_desc.dyn_tree = ds->dyn_ltree;
-    ds->d_desc.dyn_tree = ds->dyn_dtree;
-    ds->bl_desc.dyn_tree = ds->bl_tree;
-
-    return Z_OK;
-#endif
-}
-#endif  /*  0  */
-
 /* ===========================================================================
  * Read a new buffer from the current input stream, update the adler32
  * and total number of bytes read.  All deflate() input goes through
index f5ce87b0800edd421beedc2264c9b9a2a81db885..58a733b1038740f2faefca7efb40de57131e5ac6 100644 (file)
@@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm)
     return Z_OK;
 }
 
-#if 0
-int zlib_inflatePrime(z_streamp strm, int bits, int value)
-{
-    struct inflate_state *state;
-
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
-    value &= (1L << bits) - 1;
-    state->hold += value << state->bits;
-    state->bits += bits;
-    return Z_OK;
-}
-#endif
-
 int zlib_inflateInit2(z_streamp strm, int windowBits)
 {
     struct inflate_state *state;
@@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm)
     return Z_OK;
 }
 
-#if 0
-int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
-        uInt dictLength)
-{
-    struct inflate_state *state;
-    unsigned long id;
-
-    /* check state */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (state->wrap != 0 && state->mode != DICT)
-        return Z_STREAM_ERROR;
-
-    /* check for correct dictionary id */
-    if (state->mode == DICT) {
-        id = zlib_adler32(0L, NULL, 0);
-        id = zlib_adler32(id, dictionary, dictLength);
-        if (id != state->check)
-            return Z_DATA_ERROR;
-    }
-
-    /* copy dictionary to window */
-    zlib_updatewindow(strm, strm->avail_out);
-
-    if (dictLength > state->wsize) {
-        memcpy(state->window, dictionary + dictLength - state->wsize,
-                state->wsize);
-        state->whave = state->wsize;
-    }
-    else {
-        memcpy(state->window + state->wsize - dictLength, dictionary,
-                dictLength);
-        state->whave = dictLength;
-    }
-    state->havedict = 1;
-    return Z_OK;
-}
-#endif
-
-#if 0
-/*
-   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
-   or when out of input.  When called, *have is the number of pattern bytes
-   found in order so far, in 0..3.  On return *have is updated to the new
-   state.  If on return *have equals four, then the pattern was found and the
-   return value is how many bytes were read including the last byte of the
-   pattern.  If *have is less than four, then the pattern has not been found
-   yet and the return value is len.  In the latter case, zlib_syncsearch() can be
-   called again with more data and the *have state.  *have is initialized to
-   zero for the first call.
- */
-static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
-        unsigned len)
-{
-    unsigned got;
-    unsigned next;
-
-    got = *have;
-    next = 0;
-    while (next < len && got < 4) {
-        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
-            got++;
-        else if (buf[next])
-            got = 0;
-        else
-            got = 4 - got;
-        next++;
-    }
-    *have = got;
-    return next;
-}
-#endif
-
-#if 0
-int zlib_inflateSync(z_streamp strm)
-{
-    unsigned len;               /* number of bytes to look at or looked at */
-    unsigned long in, out;      /* temporary to save total_in and total_out */
-    unsigned char buf[4];       /* to restore bit buffer to byte string */
-    struct inflate_state *state;
-
-    /* check parameters */
-    if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
-    state = (struct inflate_state *)strm->state;
-    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
-    /* if first time, start search in bit buffer */
-    if (state->mode != SYNC) {
-        state->mode = SYNC;
-        state->hold <<= state->bits & 7;
-        state->bits -= state->bits & 7;
-        len = 0;
-        while (state->bits >= 8) {
-            buf[len++] = (unsigned char)(state->hold);
-            state->hold >>= 8;
-            state->bits -= 8;
-        }
-        state->have = 0;
-        zlib_syncsearch(&(state->have), buf, len);
-    }
-
-    /* search available input */
-    len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
-    strm->avail_in -= len;
-    strm->next_in += len;
-    strm->total_in += len;
-
-    /* return no joy or set up to restart inflate() on a new block */
-    if (state->have != 4) return Z_DATA_ERROR;
-    in = strm->total_in;  out = strm->total_out;
-    zlib_inflateReset(strm);
-    strm->total_in = in;  strm->total_out = out;
-    state->mode = TYPE;
-    return Z_OK;
-}
-#endif
-
 /*
  * This subroutine adds the data at next_in/avail_in to the output history
  * without performing any output.  The output buffer must be "caught up";
index 3e9977a9d657dd0df323488a6f47e0ba1f23e6f8..886db2158538572ee52790b31d93b4baeb8b187f 100644 (file)
@@ -508,21 +508,34 @@ config CMA_DEBUG
          processing calls such as dma_alloc_from_contiguous().
          This option does not affect warning and error messages.
 
-config ZBUD
-       tristate
-       default n
+config CMA_AREAS
+       int "Maximum count of the CMA areas"
+       depends on CMA
+       default 7
        help
-         A special purpose allocator for storing compressed pages.
-         It is designed to store up to two compressed pages per physical
-         page.  While this design limits storage density, it has simple and
-         deterministic reclaim properties that make it preferable to a higher
-         density approach when reclaim will be used.
+         CMA allows to create CMA areas for particular purpose, mainly,
+         used as device private area. This parameter sets the maximum
+         number of CMA area in the system.
+
+         If unsure, leave the default value "7".
+
+config MEM_SOFT_DIRTY
+       bool "Track memory changes"
+       depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
+       select PROC_PAGE_MONITOR
+       help
+         This option enables memory changes tracking by introducing a
+         soft-dirty bit on pte-s. This bit it set when someone writes
+         into a page just as regular dirty bit, but unlike the latter
+         it can be cleared by hands.
+
+         See Documentation/vm/soft-dirty.txt for more details.
 
 config ZSWAP
        bool "Compressed cache for swap pages (EXPERIMENTAL)"
        depends on FRONTSWAP && CRYPTO=y
        select CRYPTO_LZO
-       select ZBUD
+       select ZPOOL
        default n
        help
          A lightweight compressed cache for swap pages.  It takes
@@ -538,17 +551,22 @@ config ZSWAP
          they have not be fully explored on the large set of potential
          configurations and workloads that exist.
 
-config MEM_SOFT_DIRTY
-       bool "Track memory changes"
-       depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
-       select PROC_PAGE_MONITOR
+config ZPOOL
+       tristate "Common API for compressed memory storage"
+       default n
        help
-         This option enables memory changes tracking by introducing a
-         soft-dirty bit on pte-s. This bit it set when someone writes
-         into a page just as regular dirty bit, but unlike the latter
-         it can be cleared by hands.
+         Compressed memory storage API.  This allows using either zbud or
+         zsmalloc.
 
-         See Documentation/vm/soft-dirty.txt for more details.
+config ZBUD
+       tristate "Low density storage for compressed pages"
+       default n
+       help
+         A special purpose allocator for storing compressed pages.
+         It is designed to store up to two compressed pages per physical
+         page.  While this design limits storage density, it has simple and
+         deterministic reclaim properties that make it preferable to a higher
+         density approach when reclaim will be used.
 
 config ZSMALLOC
        tristate "Memory allocator for compressed pages"
index 4064f3ec145e3b2d760f720371c6d3e6830dbd99..632ae77e6070ebfad40b644a1321d6af17507541 100644 (file)
@@ -59,6 +59,8 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
+obj-$(CONFIG_ZPOOL)    += zpool.o
 obj-$(CONFIG_ZBUD)     += zbud.o
 obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
+obj-$(CONFIG_CMA)      += cma.o
diff --git a/mm/cma.c b/mm/cma.c
new file mode 100644 (file)
index 0000000..c17751c
--- /dev/null
+++ b/mm/cma.c
@@ -0,0 +1,335 @@
+/*
+ * Contiguous Memory Allocator
+ *
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Copyright IBM Corporation, 2013
+ * Copyright LG Electronics Inc., 2014
+ * Written by:
+ *     Marek Szyprowski <m.szyprowski@samsung.com>
+ *     Michal Nazarewicz <mina86@mina86.com>
+ *     Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *     Joonsoo Kim <iamjoonsoo.kim@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/cma.h>
+
+struct cma {
+       unsigned long   base_pfn;
+       unsigned long   count;
+       unsigned long   *bitmap;
+       unsigned int order_per_bit; /* Order of pages represented by one bit */
+       struct mutex    lock;
+};
+
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
+static DEFINE_MUTEX(cma_mutex);
+
+phys_addr_t cma_get_base(struct cma *cma)
+{
+       return PFN_PHYS(cma->base_pfn);
+}
+
+unsigned long cma_get_size(struct cma *cma)
+{
+       return cma->count << PAGE_SHIFT;
+}
+
+static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
+{
+       return (1UL << (align_order >> cma->order_per_bit)) - 1;
+}
+
+static unsigned long cma_bitmap_maxno(struct cma *cma)
+{
+       return cma->count >> cma->order_per_bit;
+}
+
+static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
+                                               unsigned long pages)
+{
+       return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
+}
+
+static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+       unsigned long bitmap_no, bitmap_count;
+
+       bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
+       bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+       mutex_lock(&cma->lock);
+       bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
+       mutex_unlock(&cma->lock);
+}
+
+static int __init cma_activate_area(struct cma *cma)
+{
+       int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
+       unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+       unsigned i = cma->count >> pageblock_order;
+       struct zone *zone;
+
+       cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+       if (!cma->bitmap)
+               return -ENOMEM;
+
+       WARN_ON_ONCE(!pfn_valid(pfn));
+       zone = page_zone(pfn_to_page(pfn));
+
+       do {
+               unsigned j;
+
+               base_pfn = pfn;
+               for (j = pageblock_nr_pages; j; --j, pfn++) {
+                       WARN_ON_ONCE(!pfn_valid(pfn));
+                       /*
+                        * alloc_contig_range requires the pfn range
+                        * specified to be in the same zone. Make this
+                        * simple by forcing the entire CMA resv range
+                        * to be in the same zone.
+                        */
+                       if (page_zone(pfn_to_page(pfn)) != zone)
+                               goto err;
+               }
+               init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+       } while (--i);
+
+       mutex_init(&cma->lock);
+       return 0;
+
+err:
+       kfree(cma->bitmap);
+       return -EINVAL;
+}
+
+static int __init cma_init_reserved_areas(void)
+{
+       int i;
+
+       for (i = 0; i < cma_area_count; i++) {
+               int ret = cma_activate_area(&cma_areas[i]);
+
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+core_initcall(cma_init_reserved_areas);
+
+/**
+ * cma_declare_contiguous() - reserve custom contiguous area
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @size: Size of the reserved area (in bytes),
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @alignment: Alignment for the CMA area, should be power of 2 or zero
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @fixed: hint about where to place the reserved area
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base.  If false,
+ * reserve in range from @base to @limit.
+ */
+int __init cma_declare_contiguous(phys_addr_t base,
+                       phys_addr_t size, phys_addr_t limit,
+                       phys_addr_t alignment, unsigned int order_per_bit,
+                       bool fixed, struct cma **res_cma)
+{
+       struct cma *cma;
+       int ret = 0;
+
+       pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
+               __func__, (unsigned long)size, (unsigned long)base,
+               (unsigned long)limit, (unsigned long)alignment);
+
+       if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+               pr_err("Not enough slots for CMA reserved regions!\n");
+               return -ENOSPC;
+       }
+
+       if (!size)
+               return -EINVAL;
+
+       if (alignment && !is_power_of_2(alignment))
+               return -EINVAL;
+
+       /*
+        * Sanitise input arguments.
+        * Pages both ends in CMA area could be merged into adjacent unmovable
+        * migratetype page by page allocator's buddy algorithm. In the case,
+        * you couldn't get a contiguous memory, which is not what we want.
+        */
+       alignment = max(alignment,
+               (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+       base = ALIGN(base, alignment);
+       size = ALIGN(size, alignment);
+       limit &= ~(alignment - 1);
+
+       /* size should be aligned with order_per_bit */
+       if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
+               return -EINVAL;
+
+       /* Reserve memory */
+       if (base && fixed) {
+               if (memblock_is_region_reserved(base, size) ||
+                   memblock_reserve(base, size) < 0) {
+                       ret = -EBUSY;
+                       goto err;
+               }
+       } else {
+               phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+                                                       limit);
+               if (!addr) {
+                       ret = -ENOMEM;
+                       goto err;
+               } else {
+                       base = addr;
+               }
+       }
+
+       /*
+        * Each reserved area must be initialised later, when more kernel
+        * subsystems (like slab allocator) are available.
+        */
+       cma = &cma_areas[cma_area_count];
+       cma->base_pfn = PFN_DOWN(base);
+       cma->count = size >> PAGE_SHIFT;
+       cma->order_per_bit = order_per_bit;
+       *res_cma = cma;
+       cma_area_count++;
+
+       pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+               (unsigned long)base);
+       return 0;
+
+err:
+       pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+       return ret;
+}
+
+/**
+ * cma_alloc() - allocate pages from contiguous area
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates part of contiguous memory on specific
+ * contiguous memory area.
+ */
+struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
+{
+       unsigned long mask, pfn, start = 0;
+       unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+       struct page *page = NULL;
+       int ret;
+
+       if (!cma || !cma->count)
+               return NULL;
+
+       pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+                count, align);
+
+       if (!count)
+               return NULL;
+
+       mask = cma_bitmap_aligned_mask(cma, align);
+       bitmap_maxno = cma_bitmap_maxno(cma);
+       bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+       for (;;) {
+               mutex_lock(&cma->lock);
+               bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
+                               bitmap_maxno, start, bitmap_count, mask);
+               if (bitmap_no >= bitmap_maxno) {
+                       mutex_unlock(&cma->lock);
+                       break;
+               }
+               bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
+               /*
+                * It's safe to drop the lock here. We've marked this region for
+                * our exclusive use. If the migration fails we will take the
+                * lock again and unmark it.
+                */
+               mutex_unlock(&cma->lock);
+
+               pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+               mutex_lock(&cma_mutex);
+               ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+               mutex_unlock(&cma_mutex);
+               if (ret == 0) {
+                       page = pfn_to_page(pfn);
+                       break;
+               }
+
+               cma_clear_bitmap(cma, pfn, count);
+               if (ret != -EBUSY)
+                       break;
+
+               pr_debug("%s(): memory range at %p is busy, retrying\n",
+                        __func__, pfn_to_page(pfn));
+               /* try again with a bit different memory target */
+               start = bitmap_no + mask + 1;
+       }
+
+       pr_debug("%s(): returned %p\n", __func__, page);
+       return page;
+}
+
+/**
+ * cma_release() - release allocated pages
+ * @cma:   Contiguous memory region for which the allocation is performed.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool cma_release(struct cma *cma, struct page *pages, int count)
+{
+       unsigned long pfn;
+
+       if (!cma || !pages)
+               return false;
+
+       pr_debug("%s(page %p)\n", __func__, (void *)pages);
+
+       pfn = page_to_pfn(pages);
+
+       if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+               return false;
+
+       VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+
+       free_contig_range(pfn, count);
+       cma_clear_bitmap(cma, pfn, count);
+
+       return true;
+}
index 65d44fd88c7850c5f1ae23d5d2e46b40ce831317..af19a6b079f5a5ae3a001079cb5b564260be1c22 100644 (file)
@@ -808,6 +808,17 @@ int __lock_page_killable(struct page *page)
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
+/*
+ * Return values:
+ * 1 - page is locked; mmap_sem is still held.
+ * 0 - page is not locked.
+ *     mmap_sem has been released (up_read()), unless flags had both
+ *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
+ *     which case mmap_sem is still held.
+ *
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
+ * with the page locked and the mmap_sem unperturbed.
+ */
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                         unsigned int flags)
 {
@@ -1091,9 +1102,9 @@ no_page:
                if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
                        fgp_flags |= FGP_LOCK;
 
-               /* Init accessed so avoit atomic mark_page_accessed later */
+               /* Init accessed so avoid atomic mark_page_accessed later */
                if (fgp_flags & FGP_ACCESSED)
-                       init_page_accessed(page);
+                       __SetPageReferenced(page);
 
                err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
                if (unlikely(err)) {
@@ -1827,6 +1838,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
  * The goto's are kind of ugly, but this streamlines the normal case of having
  * it in the page cache, and handles the special cases reasonably without
  * having a lot of duplicated code.
+ *
+ * vma->vm_mm->mmap_sem must be held on entry.
+ *
+ * If our return value has VM_FAULT_RETRY set, it's because
+ * lock_page_or_retry() returned 0.
+ * The mmap_sem has usually been released in this case.
+ * See __lock_page_or_retry() for the exception.
+ *
+ * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
+ * has not been released.
+ *
+ * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
  */
 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
index cc5a9e7adea77ff50e8c538b36686db466bffb54..91d044b1600dd6b216decb62ca9f1bd7ef152c52 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -258,6 +258,11 @@ unmap:
        return ret;
 }
 
+/*
+ * mmap_sem must be held on entry.  If @nonblocking != NULL and
+ * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
+ * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
+ */
 static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
                unsigned long address, unsigned int *flags, int *nonblocking)
 {
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
  * with a put_page() call when it is finished with. vmas will only
  * remain valid while mmap_sem is held.
  *
- * Must be called with mmap_sem held for read or write.
+ * Must be called with mmap_sem held.  It may be released.  See below.
  *
  * __get_user_pages walks a process's page tables and takes a reference to
  * each struct page that each user address corresponds to at a given
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
  *
  * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
  * or mmap_sem contention, and if waiting is needed to pin all pages,
- * *@nonblocking will be set to 0.
+ * *@nonblocking will be set to 0.  Further, if @gup_flags does not
+ * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
+ * this case.
+ *
+ * A caller using such a combination of @nonblocking and @gup_flags
+ * must therefore hold the mmap_sem for reading only, and recognize
+ * when it's been released.  Otherwise, it must be held for either
+ * reading or writing and will not be released.
  *
  * In most cases, get_user_pages or get_user_pages_fast should be used
  * instead of __get_user_pages. __get_user_pages should be used only if
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
  * such architectures, gup() will not be enough to make a subsequent access
  * succeed.
  *
- * This should be called with the mm_sem held for read.
+ * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
  */
 int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                     unsigned long address, unsigned int fault_flags)
index b32b70cdaed6cba1ba79914228593441f4b312d3..123bcd3ed4f209ba3710d9bfcaf8725d0a105534 100644 (file)
@@ -44,6 +44,66 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx);
  */
 #ifdef CONFIG_HIGHMEM
 
+/*
+ * Architecture with aliasing data cache may define the following family of
+ * helper functions in its asm/highmem.h to control cache color of virtual
+ * addresses where physical memory pages are mapped by kmap.
+ */
+#ifndef get_pkmap_color
+
+/*
+ * Determine color of virtual address where the page should be mapped.
+ */
+static inline unsigned int get_pkmap_color(struct page *page)
+{
+       return 0;
+}
+#define get_pkmap_color get_pkmap_color
+
+/*
+ * Get next index for mapping inside PKMAP region for page with given color.
+ */
+static inline unsigned int get_next_pkmap_nr(unsigned int color)
+{
+       static unsigned int last_pkmap_nr;
+
+       last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
+       return last_pkmap_nr;
+}
+
+/*
+ * Determine if page index inside PKMAP region (pkmap_nr) of given color
+ * has wrapped around PKMAP region end. When this happens an attempt to
+ * flush all unused PKMAP slots is made.
+ */
+static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color)
+{
+       return pkmap_nr == 0;
+}
+
+/*
+ * Get the number of PKMAP entries of the given color. If no free slot is
+ * found after checking that many entries, kmap will sleep waiting for
+ * someone to call kunmap and free PKMAP slot.
+ */
+static inline int get_pkmap_entries_count(unsigned int color)
+{
+       return LAST_PKMAP;
+}
+
+/*
+ * Get head of a wait queue for PKMAP entries of the given color.
+ * Wait queues for different mapping colors should be independent to avoid
+ * unnecessary wakeups caused by freeing of slots of other colors.
+ */
+static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
+{
+       static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
+
+       return &pkmap_map_wait;
+}
+#endif
+
 unsigned long totalhigh_pages __read_mostly;
 EXPORT_SYMBOL(totalhigh_pages);
 
@@ -68,13 +128,10 @@ unsigned int nr_free_highpages (void)
 }
 
 static int pkmap_count[LAST_PKMAP];
-static unsigned int last_pkmap_nr;
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
 
 pte_t * pkmap_page_table;
 
-static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
-
 /*
  * Most architectures have no use for kmap_high_get(), so let's abstract
  * the disabling of IRQ out of the locking in that case to save on a
@@ -161,15 +218,17 @@ static inline unsigned long map_new_virtual(struct page *page)
 {
        unsigned long vaddr;
        int count;
+       unsigned int last_pkmap_nr;
+       unsigned int color = get_pkmap_color(page);
 
 start:
-       count = LAST_PKMAP;
+       count = get_pkmap_entries_count(color);
        /* Find an empty entry */
        for (;;) {
-               last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
-               if (!last_pkmap_nr) {
+               last_pkmap_nr = get_next_pkmap_nr(color);
+               if (no_more_pkmaps(last_pkmap_nr, color)) {
                        flush_all_zero_pkmaps();
-                       count = LAST_PKMAP;
+                       count = get_pkmap_entries_count(color);
                }
                if (!pkmap_count[last_pkmap_nr])
                        break;  /* Found a usable entry */
@@ -181,12 +240,14 @@ start:
                 */
                {
                        DECLARE_WAITQUEUE(wait, current);
+                       wait_queue_head_t *pkmap_map_wait =
+                               get_pkmap_wait_queue_head(color);
 
                        __set_current_state(TASK_UNINTERRUPTIBLE);
-                       add_wait_queue(&pkmap_map_wait, &wait);
+                       add_wait_queue(pkmap_map_wait, &wait);
                        unlock_kmap();
                        schedule();
-                       remove_wait_queue(&pkmap_map_wait, &wait);
+                       remove_wait_queue(pkmap_map_wait, &wait);
                        lock_kmap();
 
                        /* Somebody else might have mapped it while we slept */
@@ -274,6 +335,8 @@ void kunmap_high(struct page *page)
        unsigned long nr;
        unsigned long flags;
        int need_wakeup;
+       unsigned int color = get_pkmap_color(page);
+       wait_queue_head_t *pkmap_map_wait;
 
        lock_kmap_any(flags);
        vaddr = (unsigned long)page_address(page);
@@ -299,13 +362,14 @@ void kunmap_high(struct page *page)
                 * no need for the wait-queue-head's lock.  Simply
                 * test if the queue is empty.
                 */
-               need_wakeup = waitqueue_active(&pkmap_map_wait);
+               pkmap_map_wait = get_pkmap_wait_queue_head(color);
+               need_wakeup = waitqueue_active(pkmap_map_wait);
        }
        unlock_kmap_any(flags);
 
        /* do wake-up, if needed, race-free outside of the spin lock */
        if (need_wakeup)
-               wake_up(&pkmap_map_wait);
+               wake_up(pkmap_map_wait);
 }
 
 EXPORT_SYMBOL(kunmap_high);
index 33514d88fef9b041cef11c74717091eec4805f80..3630d577e9879e9d6dc6a80912e2eb88d5f1c959 100644 (file)
@@ -827,7 +827,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
        }
-       if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) {
+       if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) {
                put_page(page);
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
@@ -1132,7 +1132,7 @@ alloc:
                goto out;
        }
 
-       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) {
+       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) {
                put_page(new_page);
                if (page) {
                        split_huge_page(page);
@@ -1681,7 +1681,7 @@ static void __split_huge_page_refcount(struct page *page,
                           &page_tail->_count);
 
                /* after clearing PageTail the gup refcount can be released */
-               smp_mb();
+               smp_mb__after_atomic();
 
                /*
                 * retain hwpoison flag of the poisoned tail page:
@@ -1775,6 +1775,8 @@ static int __split_huge_page_map(struct page *page,
        if (pmd) {
                pgtable = pgtable_trans_huge_withdraw(mm, pmd);
                pmd_populate(mm, &_pmd, pgtable);
+               if (pmd_write(*pmd))
+                       BUG_ON(page_mapcount(page) != 1);
 
                haddr = address;
                for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1784,8 +1786,6 @@ static int __split_huge_page_map(struct page *page,
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                        if (!pmd_write(*pmd))
                                entry = pte_wrprotect(entry);
-                       else
-                               BUG_ON(page_mapcount(page) != 1);
                        if (!pmd_young(*pmd))
                                entry = pte_mkold(entry);
                        if (pmd_numa(*pmd))
@@ -2233,6 +2233,30 @@ static void khugepaged_alloc_sleep(void)
 
 static int khugepaged_node_load[MAX_NUMNODES];
 
+static bool khugepaged_scan_abort(int nid)
+{
+       int i;
+
+       /*
+        * If zone_reclaim_mode is disabled, then no extra effort is made to
+        * allocate memory locally.
+        */
+       if (!zone_reclaim_mode)
+               return false;
+
+       /* If there is a count for this node already, it must be acceptable */
+       if (khugepaged_node_load[nid])
+               return false;
+
+       for (i = 0; i < MAX_NUMNODES; i++) {
+               if (!khugepaged_node_load[i])
+                       continue;
+               if (node_distance(nid, i) > RECLAIM_DISTANCE)
+                       return true;
+       }
+       return false;
+}
+
 #ifdef CONFIG_NUMA
 static int khugepaged_find_target_node(void)
 {
@@ -2399,7 +2423,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        if (!new_page)
                return;
 
-       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)))
+       if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE)))
                return;
 
        /*
@@ -2545,6 +2569,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                 * hit record.
                 */
                node = page_to_nid(page);
+               if (khugepaged_scan_abort(node))
+                       goto out_unmap;
                khugepaged_node_load[node]++;
                VM_BUG_ON_PAGE(PageCompound(page), page);
                if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
index 7a0a73d2fcff128850b32af9910a873d6fb384f5..eeceeeb0901978f378ead370134ba37c253ab04c 100644 (file)
@@ -35,7 +35,6 @@
 #include <linux/node.h>
 #include "internal.h"
 
-const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
 unsigned long hugepages_treat_as_movable;
 
 int hugetlb_max_hstate __read_mostly;
@@ -1089,6 +1088,9 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
        unsigned long pfn;
        struct hstate *h;
 
+       if (!hugepages_supported())
+               return;
+
        /* Set scan step to minimum hugepage size */
        for_each_hstate(h)
                if (order > huge_page_order(h))
@@ -1734,21 +1736,13 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj,
        return sprintf(buf, "%lu\n", nr_huge_pages);
 }
 
-static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
-                       struct kobject *kobj, struct kobj_attribute *attr,
-                       const char *buf, size_t len)
+static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
+                                          struct hstate *h, int nid,
+                                          unsigned long count, size_t len)
 {
        int err;
-       int nid;
-       unsigned long count;
-       struct hstate *h;
        NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
 
-       err = kstrtoul(buf, 10, &count);
-       if (err)
-               goto out;
-
-       h = kobj_to_hstate(kobj, &nid);
        if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
                err = -EINVAL;
                goto out;
@@ -1784,6 +1778,23 @@ out:
        return err;
 }
 
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+                                        struct kobject *kobj, const char *buf,
+                                        size_t len)
+{
+       struct hstate *h;
+       unsigned long count;
+       int nid;
+       int err;
+
+       err = kstrtoul(buf, 10, &count);
+       if (err)
+               return err;
+
+       h = kobj_to_hstate(kobj, &nid);
+       return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len);
+}
+
 static ssize_t nr_hugepages_show(struct kobject *kobj,
                                       struct kobj_attribute *attr, char *buf)
 {
@@ -1793,7 +1804,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj,
 static ssize_t nr_hugepages_store(struct kobject *kobj,
               struct kobj_attribute *attr, const char *buf, size_t len)
 {
-       return nr_hugepages_store_common(false, kobj, attr, buf, len);
+       return nr_hugepages_store_common(false, kobj, buf, len);
 }
 HSTATE_ATTR(nr_hugepages);
 
@@ -1812,7 +1823,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
 static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
               struct kobj_attribute *attr, const char *buf, size_t len)
 {
-       return nr_hugepages_store_common(true, kobj, attr, buf, len);
+       return nr_hugepages_store_common(true, kobj, buf, len);
 }
 HSTATE_ATTR(nr_hugepages_mempolicy);
 #endif
@@ -2248,36 +2259,21 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
                         void __user *buffer, size_t *length, loff_t *ppos)
 {
        struct hstate *h = &default_hstate;
-       unsigned long tmp;
+       unsigned long tmp = h->max_huge_pages;
        int ret;
 
        if (!hugepages_supported())
                return -ENOTSUPP;
 
-       tmp = h->max_huge_pages;
-
-       if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
-               return -EINVAL;
-
        table->data = &tmp;
        table->maxlen = sizeof(unsigned long);
        ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
        if (ret)
                goto out;
 
-       if (write) {
-               NODEMASK_ALLOC(nodemask_t, nodes_allowed,
-                                               GFP_KERNEL | __GFP_NORETRY);
-               if (!(obey_mempolicy &&
-                              init_nodemask_of_mempolicy(nodes_allowed))) {
-                       NODEMASK_FREE(nodes_allowed);
-                       nodes_allowed = &node_states[N_MEMORY];
-               }
-               h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
-
-               if (nodes_allowed != &node_states[N_MEMORY])
-                       NODEMASK_FREE(nodes_allowed);
-       }
+       if (write)
+               ret = __nr_hugepages_store_common(obey_mempolicy, h,
+                                                 NUMA_NO_NODE, tmp, *length);
 out:
        return ret;
 }
@@ -2754,8 +2750,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
  * from other VMAs and let the children be SIGKILLed if they are faulting the
  * same region.
  */
-static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
-                               struct page *page, unsigned long address)
+static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+                             struct page *page, unsigned long address)
 {
        struct hstate *h = hstate_vma(vma);
        struct vm_area_struct *iter_vma;
@@ -2794,8 +2790,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
                                             address + huge_page_size(h), page);
        }
        mutex_unlock(&mapping->i_mmap_mutex);
-
-       return 1;
 }
 
 /*
@@ -2810,7 +2804,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct hstate *h = hstate_vma(vma);
        struct page *old_page, *new_page;
-       int outside_reserve = 0;
+       int ret = 0, outside_reserve = 0;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
 
@@ -2840,14 +2834,14 @@ retry_avoidcopy:
 
        page_cache_get(old_page);
 
-       /* Drop page table lock as buddy allocator may be called */
+       /*
+        * Drop page table lock as buddy allocator may be called. It will
+        * be acquired again before returning to the caller, as expected.
+        */
        spin_unlock(ptl);
        new_page = alloc_huge_page(vma, address, outside_reserve);
 
        if (IS_ERR(new_page)) {
-               long err = PTR_ERR(new_page);
-               page_cache_release(old_page);
-
                /*
                 * If a process owning a MAP_PRIVATE mapping fails to COW,
                 * it is due to references held by a child and an insufficient
@@ -2856,29 +2850,25 @@ retry_avoidcopy:
                 * may get SIGKILLed if it later faults.
                 */
                if (outside_reserve) {
+                       page_cache_release(old_page);
                        BUG_ON(huge_pte_none(pte));
-                       if (unmap_ref_private(mm, vma, old_page, address)) {
-                               BUG_ON(huge_pte_none(pte));
-                               spin_lock(ptl);
-                               ptep = huge_pte_offset(mm, address & huge_page_mask(h));
-                               if (likely(ptep &&
-                                          pte_same(huge_ptep_get(ptep), pte)))
-                                       goto retry_avoidcopy;
-                               /*
-                                * race occurs while re-acquiring page table
-                                * lock, and our job is done.
-                                */
-                               return 0;
-                       }
-                       WARN_ON_ONCE(1);
+                       unmap_ref_private(mm, vma, old_page, address);
+                       BUG_ON(huge_pte_none(pte));
+                       spin_lock(ptl);
+                       ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+                       if (likely(ptep &&
+                                  pte_same(huge_ptep_get(ptep), pte)))
+                               goto retry_avoidcopy;
+                       /*
+                        * race occurs while re-acquiring page table
+                        * lock, and our job is done.
+                        */
+                       return 0;
                }
 
-               /* Caller expects lock to be held */
-               spin_lock(ptl);
-               if (err == -ENOMEM)
-                       return VM_FAULT_OOM;
-               else
-                       return VM_FAULT_SIGBUS;
+               ret = (PTR_ERR(new_page) == -ENOMEM) ?
+                       VM_FAULT_OOM : VM_FAULT_SIGBUS;
+               goto out_release_old;
        }
 
        /*
@@ -2886,11 +2876,8 @@ retry_avoidcopy:
         * anon_vma prepared.
         */
        if (unlikely(anon_vma_prepare(vma))) {
-               page_cache_release(new_page);
-               page_cache_release(old_page);
-               /* Caller expects lock to be held */
-               spin_lock(ptl);
-               return VM_FAULT_OOM;
+               ret = VM_FAULT_OOM;
+               goto out_release_all;
        }
 
        copy_user_huge_page(new_page, old_page, address, vma,
@@ -2900,6 +2887,7 @@ retry_avoidcopy:
        mmun_start = address & huge_page_mask(h);
        mmun_end = mmun_start + huge_page_size(h);
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
        /*
         * Retake the page table lock to check for racing updates
         * before the page tables are altered
@@ -2920,12 +2908,13 @@ retry_avoidcopy:
        }
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out_release_all:
        page_cache_release(new_page);
+out_release_old:
        page_cache_release(old_page);
 
-       /* Caller expects lock to be held */
-       spin_lock(ptl);
-       return 0;
+       spin_lock(ptl); /* Caller expects lock to be held */
+       return ret;
 }
 
 /* Return the pagecache page at a given address within a VMA */
index 95487c71cad59737994d77e1d47c1ff07a7d4365..329caf56df22d84d02495e35f051062b01bdeaba 100644 (file)
@@ -72,8 +72,7 @@ DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
 
 static void pfn_inject_exit(void)
 {
-       if (hwpoison_dir)
-               debugfs_remove_recursive(hwpoison_dir);
+       debugfs_remove_recursive(hwpoison_dir);
 }
 
 static int pfn_inject_init(void)
index 7f22a11fcc66e3e6e0b5a16319f7f4dc232e0ffb..a1b651b11c5fcba7a0322bc19c93286896d08cbf 100644 (file)
@@ -247,7 +247,7 @@ static inline void mlock_migrate_page(struct page *new, struct page *old) { }
 static inline struct page *mem_map_offset(struct page *base, int offset)
 {
        if (unlikely(offset >= MAX_ORDER_NR_PAGES))
-               return pfn_to_page(page_to_pfn(base) + offset);
+               return nth_page(base, offset);
        return base + offset;
 }
 
index a402f8fdc68e94888ea177104524085c9f490fd5..0938b30da4abbb91aa01e7884c6c236728403dee 100644 (file)
@@ -292,9 +292,6 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 /*
  * Application wants to free up the pages and associated backing store.
  * This is effectively punching a hole into the middle of a file.
- *
- * NOTE: Currently, only shmfs/tmpfs is supported for this operation.
- * Other filesystems return -ENOSYS.
  */
 static long madvise_remove(struct vm_area_struct *vma,
                                struct vm_area_struct **prev,
index f009a14918d29c8c9b7c09db6c60428a0042f644..90dc501eaf3fbcbc7a60efeb1a4b3072220c04dc 100644 (file)
@@ -2551,55 +2551,72 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
-
-/* See mem_cgroup_try_charge() for details */
-enum {
-       CHARGE_OK,              /* success */
-       CHARGE_RETRY,           /* need to retry but retry is not bad */
-       CHARGE_NOMEM,           /* we can't do more. return -ENOMEM */
-       CHARGE_WOULDBLOCK,      /* GFP_WAIT wasn't set and no enough res. */
-};
-
-static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
-                               unsigned int nr_pages, unsigned int min_pages,
-                               bool invoke_oom)
+/**
+ * mem_cgroup_try_charge - try charging a memcg
+ * @memcg: memcg to charge
+ * @nr_pages: number of pages to charge
+ *
+ * Returns 0 if @memcg was charged successfully, -EINTR if the charge
+ * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
+ */
+static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
+                                gfp_t gfp_mask,
+                                unsigned int nr_pages)
 {
-       unsigned long csize = nr_pages * PAGE_SIZE;
+       unsigned int batch = max(CHARGE_BATCH, nr_pages);
+       int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
        struct mem_cgroup *mem_over_limit;
        struct res_counter *fail_res;
+       unsigned long nr_reclaimed;
        unsigned long flags = 0;
-       int ret;
+       unsigned long long size;
+       int ret = 0;
 
-       ret = res_counter_charge(&memcg->res, csize, &fail_res);
+retry:
+       if (consume_stock(memcg, nr_pages))
+               goto done;
 
-       if (likely(!ret)) {
+       size = batch * PAGE_SIZE;
+       if (!res_counter_charge(&memcg->res, size, &fail_res)) {
                if (!do_swap_account)
-                       return CHARGE_OK;
-               ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
-               if (likely(!ret))
-                       return CHARGE_OK;
-
-               res_counter_uncharge(&memcg->res, csize);
+                       goto done_restock;
+               if (!res_counter_charge(&memcg->memsw, size, &fail_res))
+                       goto done_restock;
+               res_counter_uncharge(&memcg->res, size);
                mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
                flags |= MEM_CGROUP_RECLAIM_NOSWAP;
        } else
                mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
+
+       if (batch > nr_pages) {
+               batch = nr_pages;
+               goto retry;
+       }
+
        /*
-        * Never reclaim on behalf of optional batching, retry with a
-        * single page instead.
+        * Unlike in global OOM situations, memcg is not in a physical
+        * memory shortage.  Allow dying and OOM-killed tasks to
+        * bypass the last charges so that they can exit quickly and
+        * free their memory.
         */
-       if (nr_pages > min_pages)
-               return CHARGE_RETRY;
+       if (unlikely(test_thread_flag(TIF_MEMDIE) ||
+                    fatal_signal_pending(current) ||
+                    current->flags & PF_EXITING))
+               goto bypass;
+
+       if (unlikely(task_in_memcg_oom(current)))
+               goto nomem;
 
        if (!(gfp_mask & __GFP_WAIT))
-               return CHARGE_WOULDBLOCK;
+               goto nomem;
 
-       if (gfp_mask & __GFP_NORETRY)
-               return CHARGE_NOMEM;
+       nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
 
-       ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
        if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
-               return CHARGE_RETRY;
+               goto retry;
+
+       if (gfp_mask & __GFP_NORETRY)
+               goto nomem;
        /*
         * Even though the limit is exceeded at this point, reclaim
         * may have been able to free some pages.  Retry the charge
@@ -2609,96 +2626,38 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
         * unlikely to succeed so close to the limit, and we fall back
         * to regular pages anyway in case of failure.
         */
-       if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
-               return CHARGE_RETRY;
-
+       if (nr_reclaimed && nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER))
+               goto retry;
        /*
         * At task move, charge accounts can be doubly counted. So, it's
         * better to wait until the end of task_move if something is going on.
         */
        if (mem_cgroup_wait_acct_move(mem_over_limit))
-               return CHARGE_RETRY;
-
-       if (invoke_oom)
-               mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
-
-       return CHARGE_NOMEM;
-}
-
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
-                                gfp_t gfp_mask,
-                                unsigned int nr_pages,
-                                bool oom)
-{
-       unsigned int batch = max(CHARGE_BATCH, nr_pages);
-       int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
-       int ret;
-
-       if (mem_cgroup_is_root(memcg))
-               goto done;
-       /*
-        * Unlike in global OOM situations, memcg is not in a physical
-        * memory shortage.  Allow dying and OOM-killed tasks to
-        * bypass the last charges so that they can exit quickly and
-        * free their memory.
-        */
-       if (unlikely(test_thread_flag(TIF_MEMDIE) ||
-                    fatal_signal_pending(current) ||
-                    current->flags & PF_EXITING))
-               goto bypass;
+               goto retry;
 
-       if (unlikely(task_in_memcg_oom(current)))
-               goto nomem;
+       if (nr_retries--)
+               goto retry;
 
        if (gfp_mask & __GFP_NOFAIL)
-               oom = false;
-again:
-       if (consume_stock(memcg, nr_pages))
-               goto done;
-
-       do {
-               bool invoke_oom = oom && !nr_oom_retries;
-
-               /* If killed, bypass charge */
-               if (fatal_signal_pending(current))
-                       goto bypass;
+               goto bypass;
 
-               ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
-                                          nr_pages, invoke_oom);
-               switch (ret) {
-               case CHARGE_OK:
-                       break;
-               case CHARGE_RETRY: /* not in OOM situation but retry */
-                       batch = nr_pages;
-                       goto again;
-               case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
-                       goto nomem;
-               case CHARGE_NOMEM: /* OOM routine works */
-                       if (!oom || invoke_oom)
-                               goto nomem;
-                       nr_oom_retries--;
-                       break;
-               }
-       } while (ret != CHARGE_OK);
+       if (fatal_signal_pending(current))
+               goto bypass;
 
-       if (batch > nr_pages)
-               refill_stock(memcg, batch - nr_pages);
-done:
-       return 0;
+       mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages));
 nomem:
        if (!(gfp_mask & __GFP_NOFAIL))
                return -ENOMEM;
 bypass:
-       return -EINTR;
+       memcg = root_mem_cgroup;
+       ret = -EINTR;
+       goto retry;
+
+done_restock:
+       if (batch > nr_pages)
+               refill_stock(memcg, batch - nr_pages);
+done:
+       return ret;
 }
 
 /**
@@ -2712,15 +2671,14 @@ bypass:
  */
 static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
                                 gfp_t gfp_mask,
-                                unsigned int nr_pages,
-                                bool oom)
+                                unsigned int nr_pages)
 
 {
        struct mem_cgroup *memcg;
        int ret;
 
        memcg = get_mem_cgroup_from_mm(mm);
-       ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
+       ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
        css_put(&memcg->css);
        if (ret == -EINTR)
                memcg = root_mem_cgroup;
@@ -2738,13 +2696,11 @@ static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
 static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
                                       unsigned int nr_pages)
 {
-       if (!mem_cgroup_is_root(memcg)) {
-               unsigned long bytes = nr_pages * PAGE_SIZE;
+       unsigned long bytes = nr_pages * PAGE_SIZE;
 
-               res_counter_uncharge(&memcg->res, bytes);
-               if (do_swap_account)
-                       res_counter_uncharge(&memcg->memsw, bytes);
-       }
+       res_counter_uncharge(&memcg->res, bytes);
+       if (do_swap_account)
+               res_counter_uncharge(&memcg->memsw, bytes);
 }
 
 /*
@@ -2756,9 +2712,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
 {
        unsigned long bytes = nr_pages * PAGE_SIZE;
 
-       if (mem_cgroup_is_root(memcg))
-               return;
-
        res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
        if (do_swap_account)
                res_counter_uncharge_until(&memcg->memsw,
@@ -2842,14 +2795,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
        }
 
        pc->mem_cgroup = memcg;
-       /*
-        * We access a page_cgroup asynchronously without lock_page_cgroup().
-        * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
-        * is accessed after testing USED bit. To make pc->mem_cgroup visible
-        * before USED bit, we need memory barrier here.
-        * See mem_cgroup_add_lru_list(), etc.
-        */
-       smp_wmb();
        SetPageCgroupUsed(pc);
 
        if (lrucare) {
@@ -2937,8 +2882,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
        if (ret)
                return ret;
 
-       ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
-                                   oom_gfp_allowed(gfp));
+       ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT);
        if (ret == -EINTR)  {
                /*
                 * mem_cgroup_try_charge() chosed to bypass to root due to
@@ -3463,12 +3407,13 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
                memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
                return;
        }
-
+       /*
+        * The page is freshly allocated and not visible to any
+        * outside callers yet.  Set up pc non-atomically.
+        */
        pc = lookup_page_cgroup(page);
-       lock_page_cgroup(pc);
        pc->mem_cgroup = memcg;
-       SetPageCgroupUsed(pc);
-       unlock_page_cgroup(pc);
+       pc->flags = PCG_USED;
 }
 
 void __memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -3478,19 +3423,11 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
 
 
        pc = lookup_page_cgroup(page);
-       /*
-        * Fast unlocked return. Theoretically might have changed, have to
-        * check again after locking.
-        */
        if (!PageCgroupUsed(pc))
                return;
 
-       lock_page_cgroup(pc);
-       if (PageCgroupUsed(pc)) {
-               memcg = pc->mem_cgroup;
-               ClearPageCgroupUsed(pc);
-       }
-       unlock_page_cgroup(pc);
+       memcg = pc->mem_cgroup;
+       pc->flags = 0;
 
        /*
         * We trust that only if there is a memcg associated with the page, it
@@ -3531,7 +3468,6 @@ void mem_cgroup_split_huge_fixup(struct page *head)
        for (i = 1; i < HPAGE_PMD_NR; i++) {
                pc = head_pc + i;
                pc->mem_cgroup = memcg;
-               smp_wmb();/* see __commit_charge() */
                pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
        }
        __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
@@ -3687,7 +3623,6 @@ int mem_cgroup_charge_anon(struct page *page,
 {
        unsigned int nr_pages = 1;
        struct mem_cgroup *memcg;
-       bool oom = true;
 
        if (mem_cgroup_disabled())
                return 0;
@@ -3699,14 +3634,9 @@ int mem_cgroup_charge_anon(struct page *page,
        if (PageTransHuge(page)) {
                nr_pages <<= compound_order(page);
                VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-               /*
-                * Never OOM-kill a process for a huge page.  The
-                * fault handler will fall back to regular pages.
-                */
-               oom = false;
        }
 
-       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
+       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
        if (!memcg)
                return -ENOMEM;
        __mem_cgroup_commit_charge(memcg, page, nr_pages,
@@ -3743,7 +3673,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
                memcg = try_get_mem_cgroup_from_page(page);
        if (!memcg)
                memcg = get_mem_cgroup_from_mm(mm);
-       ret = mem_cgroup_try_charge(memcg, mask, 1, true);
+       ret = mem_cgroup_try_charge(memcg, mask, 1);
        css_put(&memcg->css);
        if (ret == -EINTR)
                memcg = root_mem_cgroup;
@@ -3770,7 +3700,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
        if (!PageSwapCache(page)) {
                struct mem_cgroup *memcg;
 
-               memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+               memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
                if (!memcg)
                        return -ENOMEM;
                *memcgp = memcg;
@@ -3839,7 +3769,7 @@ int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
                return 0;
        }
 
-       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
        if (!memcg)
                return -ENOMEM;
        __mem_cgroup_commit_charge(memcg, page, 1, type, false);
@@ -3993,7 +3923,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
         * replacement page, so leave it alone when phasing out the
         * page that is unused after the migration.
         */
-       if (!end_migration && !mem_cgroup_is_root(memcg))
+       if (!end_migration)
                mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
 
        return memcg;
@@ -4126,8 +4056,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
                 * We uncharge this because swap is freed.  This memcg can
                 * be obsolete one. We avoid calling css_tryget_online().
                 */
-               if (!mem_cgroup_is_root(memcg))
-                       res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+               res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
                mem_cgroup_swap_statistics(memcg, false);
                css_put(&memcg->css);
        }
@@ -4817,78 +4746,24 @@ out:
        return retval;
 }
 
-
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
-                                              enum mem_cgroup_stat_index idx)
-{
-       struct mem_cgroup *iter;
-       long val = 0;
-
-       /* Per-cpu values can be negative, use a signed accumulator */
-       for_each_mem_cgroup_tree(iter, memcg)
-               val += mem_cgroup_read_stat(iter, idx);
-
-       if (val < 0) /* race ? */
-               val = 0;
-       return val;
-}
-
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
-{
-       u64 val;
-
-       if (!mem_cgroup_is_root(memcg)) {
-               if (!swap)
-                       return res_counter_read_u64(&memcg->res, RES_USAGE);
-               else
-                       return res_counter_read_u64(&memcg->memsw, RES_USAGE);
-       }
-
-       /*
-        * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
-        * as well as in MEM_CGROUP_STAT_RSS_HUGE.
-        */
-       val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
-       val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-
-       if (swap)
-               val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-
-       return val << PAGE_SHIFT;
-}
-
 static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
-                                  struct cftype *cft)
+                              struct cftype *cft)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-       u64 val;
-       int name;
-       enum res_type type;
-
-       type = MEMFILE_TYPE(cft->private);
-       name = MEMFILE_ATTR(cft->private);
+       enum res_type type = MEMFILE_TYPE(cft->private);
+       int name = MEMFILE_ATTR(cft->private);
 
        switch (type) {
        case _MEM:
-               if (name == RES_USAGE)
-                       val = mem_cgroup_usage(memcg, false);
-               else
-                       val = res_counter_read_u64(&memcg->res, name);
-               break;
+               return res_counter_read_u64(&memcg->res, name);
        case _MEMSWAP:
-               if (name == RES_USAGE)
-                       val = mem_cgroup_usage(memcg, true);
-               else
-                       val = res_counter_read_u64(&memcg->memsw, name);
-               break;
+               return res_counter_read_u64(&memcg->memsw, name);
        case _KMEM:
-               val = res_counter_read_u64(&memcg->kmem, name);
+               return res_counter_read_u64(&memcg->kmem, name);
                break;
        default:
                BUG();
        }
-
-       return val;
 }
 
 #ifdef CONFIG_MEMCG_KMEM
@@ -5350,7 +5225,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
        if (!t)
                goto unlock;
 
-       usage = mem_cgroup_usage(memcg, swap);
+       if (!swap)
+               usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+       else
+               usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
 
        /*
         * current_threshold points to threshold just below or equal to usage.
@@ -5446,15 +5324,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
 
        mutex_lock(&memcg->thresholds_lock);
 
-       if (type == _MEM)
+       if (type == _MEM) {
                thresholds = &memcg->thresholds;
-       else if (type == _MEMSWAP)
+               usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+       } else if (type == _MEMSWAP) {
                thresholds = &memcg->memsw_thresholds;
-       else
+               usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+       } else
                BUG();
 
-       usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
        /* Check if a threshold crossed before adding a new one */
        if (thresholds->primary)
                __mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -5534,18 +5412,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
        int i, j, size;
 
        mutex_lock(&memcg->thresholds_lock);
-       if (type == _MEM)
+
+       if (type == _MEM) {
                thresholds = &memcg->thresholds;
-       else if (type == _MEMSWAP)
+               usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+       } else if (type == _MEMSWAP) {
                thresholds = &memcg->memsw_thresholds;
-       else
+               usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+       } else
                BUG();
 
        if (!thresholds->primary)
                goto unlock;
 
-       usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
        /* Check if a threshold crossed before removing */
        __mem_cgroup_threshold(memcg, type == _MEMSWAP);
 
@@ -6299,9 +6178,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
                 * core guarantees its existence.
                 */
        } else {
-               res_counter_init(&memcg->res, NULL);
-               res_counter_init(&memcg->memsw, NULL);
-               res_counter_init(&memcg->kmem, NULL);
+               res_counter_init(&memcg->res, &root_mem_cgroup->res);
+               res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
+               res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
                /*
                 * Deeper hierachy with use_hierarchy == false doesn't make
                 * much sense so let cgroup subsystem know about this
@@ -6435,55 +6314,39 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 
 #ifdef CONFIG_MMU
 /* Handlers for move charge at task migration. */
-#define PRECHARGE_COUNT_AT_ONCE        256
 static int mem_cgroup_do_precharge(unsigned long count)
 {
-       int ret = 0;
-       int batch_count = PRECHARGE_COUNT_AT_ONCE;
-       struct mem_cgroup *memcg = mc.to;
+       int ret;
 
-       if (mem_cgroup_is_root(memcg)) {
+       /* Try a single bulk charge without reclaim first */
+       ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+       if (!ret) {
                mc.precharge += count;
-               /* we don't need css_get for root */
                return ret;
        }
-       /* try to charge at once */
-       if (count > 1) {
-               struct res_counter *dummy;
-               /*
-                * "memcg" cannot be under rmdir() because we've already checked
-                * by cgroup_lock_live_cgroup() that it is not removed and we
-                * are still under the same cgroup_mutex. So we can postpone
-                * css_get().
-                */
-               if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
-                       goto one_by_one;
-               if (do_swap_account && res_counter_charge(&memcg->memsw,
-                                               PAGE_SIZE * count, &dummy)) {
-                       res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
-                       goto one_by_one;
-               }
-               mc.precharge += count;
+       if (ret == -EINTR) {
+               __mem_cgroup_cancel_charge(root_mem_cgroup, count);
                return ret;
        }
-one_by_one:
-       /* fall back to one by one charge */
+
+       /* Try charges one by one with reclaim */
        while (count--) {
-               if (signal_pending(current)) {
-                       ret = -EINTR;
-                       break;
-               }
-               if (!batch_count--) {
-                       batch_count = PRECHARGE_COUNT_AT_ONCE;
-                       cond_resched();
-               }
-               ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
+               ret = mem_cgroup_try_charge(mc.to,
+                                           GFP_KERNEL & ~__GFP_NORETRY, 1);
+               /*
+                * In case of failure, any residual charges against
+                * mc.to will be dropped by mem_cgroup_clear_mc()
+                * later on.  However, cancel any charges that are
+                * bypassed to root right away or they'll be lost.
+                */
+               if (ret == -EINTR)
+                       __mem_cgroup_cancel_charge(root_mem_cgroup, 1);
                if (ret)
-                       /* mem_cgroup_clear_mc() will do uncharge later */
                        return ret;
                mc.precharge++;
+               cond_resched();
        }
-       return ret;
+       return 0;
 }
 
 /**
@@ -6760,21 +6623,18 @@ static void __mem_cgroup_clear_mc(void)
        /* we must fixup refcnts and charges */
        if (mc.moved_swap) {
                /* uncharge swap account from the old cgroup */
-               if (!mem_cgroup_is_root(mc.from))
-                       res_counter_uncharge(&mc.from->memsw,
-                                               PAGE_SIZE * mc.moved_swap);
+               res_counter_uncharge(&mc.from->memsw,
+                                    PAGE_SIZE * mc.moved_swap);
 
                for (i = 0; i < mc.moved_swap; i++)
                        css_put(&mc.from->css);
 
-               if (!mem_cgroup_is_root(mc.to)) {
-                       /*
-                        * we charged both to->res and to->memsw, so we should
-                        * uncharge to->res.
-                        */
-                       res_counter_uncharge(&mc.to->res,
-                                               PAGE_SIZE * mc.moved_swap);
-               }
+               /*
+                * we charged both to->res and to->memsw, so we should
+                * uncharge to->res.
+                */
+               res_counter_uncharge(&mc.to->res,
+                                    PAGE_SIZE * mc.moved_swap);
                /* we've already done css_get(mc.to) */
                mc.moved_swap = 0;
        }
index a013bc94ebbed4af3764e396b087475310ed5185..44c6bd201d3a1cac7120527b45e2a86f5f77abff 100644 (file)
@@ -1172,6 +1172,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 
        lock_page(hpage);
 
+       /*
+        * The page could have changed compound pages during the locking.
+        * If this happens just bail out.
+        */
+       if (compound_head(p) != hpage) {
+               action_result(pfn, "different compound page after locking", IGNORED);
+               res = -EBUSY;
+               goto out;
+       }
+
        /*
         * We use page flags to determine what action should be taken, but
         * the flags can be modified by the error containment action.  One
index 8b44f765b64584a9a2e7c6f8873d6fcb6acb7726..5c55270729f7b45a1196e8fb4fc5a5374dcc6d9d 100644 (file)
@@ -884,7 +884,7 @@ out_set_pte:
        return 0;
 }
 
-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                   pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
                   unsigned long addr, unsigned long end)
 {
@@ -2399,7 +2399,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
 /*
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * We return with the mmap_sem locked or unlocked in the same cases
+ * as does filemap_fault().
  */
 static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2688,6 +2691,11 @@ oom:
        return VM_FAULT_OOM;
 }
 
+/*
+ * The mmap_sem must have been held on entry, and may have been
+ * released depending on flags and vma->vm_ops->fault() return value.
+ * See filemap_fault() and __lock_page_retry().
+ */
 static int __do_fault(struct vm_area_struct *vma, unsigned long address,
                pgoff_t pgoff, unsigned int flags, struct page **page)
 {
@@ -2744,7 +2752,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
        if (write)
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
        else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
-               pte_mksoft_dirty(entry);
+               entry = pte_mksoft_dirty(entry);
        if (anon) {
                inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
                page_add_new_anon_rmap(page, vma, address);
@@ -2758,17 +2766,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
        update_mmu_cache(vma, address, pte);
 }
 
-static unsigned long fault_around_bytes = rounddown_pow_of_two(65536);
-
-static inline unsigned long fault_around_pages(void)
-{
-       return fault_around_bytes >> PAGE_SHIFT;
-}
-
-static inline unsigned long fault_around_mask(void)
-{
-       return ~(fault_around_bytes - 1) & PAGE_MASK;
-}
+static unsigned long fault_around_bytes __read_mostly =
+       rounddown_pow_of_two(65536);
 
 #ifdef CONFIG_DEBUG_FS
 static int fault_around_bytes_get(void *data, u64 *val)
@@ -2834,12 +2833,15 @@ late_initcall(fault_around_debugfs);
 static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
                pte_t *pte, pgoff_t pgoff, unsigned int flags)
 {
-       unsigned long start_addr;
+       unsigned long start_addr, nr_pages, mask;
        pgoff_t max_pgoff;
        struct vm_fault vmf;
        int off;
 
-       start_addr = max(address & fault_around_mask(), vma->vm_start);
+       nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT;
+       mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
+
+       start_addr = max(address & mask, vma->vm_start);
        off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
        pte -= off;
        pgoff -= off;
@@ -2851,7 +2853,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
        max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
                PTRS_PER_PTE - 1;
        max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
-                       pgoff + fault_around_pages() - 1);
+                       pgoff + nr_pages - 1);
 
        /* Check if it makes any sense to call ->map_pages */
        while (!pte_none(*pte)) {
@@ -2886,7 +2888,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         * something).
         */
        if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
-           fault_around_pages() > 1) {
+           fault_around_bytes >> PAGE_SHIFT > 1) {
                pte = pte_offset_map_lock(mm, pmd, address, &ptl);
                do_fault_around(vma, address, pte, pgoff, flags);
                if (!pte_same(*pte, orig_pte))
@@ -3016,6 +3018,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        return ret;
 }
 
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults).
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
 static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
                unsigned int flags, pte_t orig_pte)
@@ -3040,7 +3048,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  *
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
  */
 static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3172,7 +3182,10 @@ out:
  *
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
  */
 static int handle_pte_fault(struct mm_struct *mm,
                     struct vm_area_struct *vma, unsigned long address,
@@ -3181,7 +3194,7 @@ static int handle_pte_fault(struct mm_struct *mm,
        pte_t entry;
        spinlock_t *ptl;
 
-       entry = *pte;
+       entry = ACCESS_ONCE(*pte);
        if (!pte_present(entry)) {
                if (pte_none(entry)) {
                        if (vma->vm_ops) {
@@ -3232,6 +3245,9 @@ unlock:
 
 /*
  * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
  */
 static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                             unsigned long address, unsigned int flags)
@@ -3313,6 +3329,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        return handle_pte_fault(mm, vma, address, pte, pmd, flags);
 }
 
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
 int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                    unsigned long address, unsigned int flags)
 {
@@ -3591,11 +3613,13 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                ret = get_user_pages(tsk, mm, addr, 1,
                                write, 1, &page, &vma);
                if (ret <= 0) {
+#ifndef CONFIG_HAVE_IOREMAP_PROT
+                       break;
+#else
                        /*
                         * Check if this is a VM_IO | VM_PFNMAP VMA, which
                         * we can access using slightly different code.
                         */
-#ifdef CONFIG_HAVE_IOREMAP_PROT
                        vma = find_vma(mm, addr);
                        if (!vma || vma->vm_start > addr)
                                break;
@@ -3603,9 +3627,9 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                                ret = vma->vm_ops->access(vma, addr, buf,
                                                          len, write);
                        if (ret <= 0)
-#endif
                                break;
                        bytes = ret;
+#endif
                } else {
                        bytes = len;
                        offset = addr & (PAGE_SIZE-1);
index 469bbf505f85543f7184428016b56964144e400d..2ff8c2325e968b509e983077a41ec4d0f42c00f4 100644 (file)
@@ -284,8 +284,8 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
 }
 #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
 
-static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
-                          unsigned long end_pfn)
+static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
+                                    unsigned long end_pfn)
 {
        unsigned long old_zone_end_pfn;
 
@@ -427,8 +427,8 @@ out_fail:
        return -1;
 }
 
-static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
-                           unsigned long end_pfn)
+static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
+                                     unsigned long end_pfn)
 {
        unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
 
@@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
        zone = page_zone(pfn_to_page(pfn));
 
        ret = -EINVAL;
-       if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
+       if ((zone_idx(zone) > ZONE_NORMAL ||
+           online_type == MMOP_ONLINE_MOVABLE) &&
            !can_online_high_movable(zone))
                goto out;
 
-       if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
+       if (online_type == MMOP_ONLINE_KERNEL &&
+           zone_idx(zone) == ZONE_MOVABLE) {
                if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
                        goto out;
        }
-       if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
+       if (online_type == MMOP_ONLINE_MOVABLE &&
+           zone_idx(zone) == ZONE_MOVABLE - 1) {
                if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
                        goto out;
        }
@@ -1156,6 +1159,34 @@ static int check_hotplug_memory_range(u64 start, u64 size)
        return 0;
 }
 
+/*
+ * If movable zone has already been setup, newly added memory should be check.
+ * If its address is higher than movable zone, it should be added as movable.
+ * Without this check, movable zone may overlap with other zone.
+ */
+static int should_add_memory_movable(int nid, u64 start, u64 size)
+{
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       pg_data_t *pgdat = NODE_DATA(nid);
+       struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
+
+       if (zone_is_empty(movable_zone))
+               return 0;
+
+       if (movable_zone->zone_start_pfn <= start_pfn)
+               return 1;
+
+       return 0;
+}
+
+int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
+{
+       if (should_add_memory_movable(nid, start, size))
+               return ZONE_MOVABLE;
+
+       return zone_default;
+}
+
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
 int __ref add_memory(int nid, u64 start, u64 size)
 {
index b1eb53634005606298d115ac5cdeb90cac923957..ce84cb0b83ef56179facdacc286bce88abd05fae 100644 (file)
@@ -210,12 +210,19 @@ out:
  * @vma:   target vma
  * @start: start address
  * @end:   end address
+ * @nonblocking:
  *
  * This takes care of making the pages present too.
  *
  * return 0 on success, negative error code on error.
  *
- * vma->vm_mm->mmap_sem must be held for at least read.
+ * vma->vm_mm->mmap_sem must be held.
+ *
+ * If @nonblocking is NULL, it may be held for read or write and will
+ * be unperturbed.
+ *
+ * If @nonblocking is non-NULL, it must held for read only and may be
+ * released.  If it's released, *@nonblocking will be set to 0.
  */
 long __mlock_vma_pages_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end, int *nonblocking)
index 129b847d30cc35c8724cee63e924f69faaf542c6..64c9d736155c7a546e6d133426a0861a63688ead 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
+#include <linux/mmdebug.h>
 #include <linux/perf_event.h>
 #include <linux/audit.h>
 #include <linux/khugepaged.h>
@@ -134,6 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 {
        unsigned long free, allowed, reserve;
 
+       VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+                       -(s64)vm_committed_as_batch * num_online_cpus(),
+                       "memory commitment underflow");
+
        vm_acct_memory(pages);
 
        /*
index 41cefdf0aaddc46144187cba72ddf3e2629a9f02..950813b1eb3656dc49e66eab4e912fff76dbfc2b 100644 (file)
 /* global SRCU for all MMs */
 static struct srcu_struct srcu;
 
+/*
+ * This function allows mmu_notifier::release callback to delay a call to
+ * a function that will free appropriate resources. The function must be
+ * quick and must not block.
+ */
+void mmu_notifier_call_srcu(struct rcu_head *rcu,
+                           void (*func)(struct rcu_head *rcu))
+{
+       call_srcu(&srcu, rcu, func);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
+
+void mmu_notifier_synchronize(void)
+{
+       /* Wait for any running method to finish. */
+       srcu_barrier(&srcu);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
+
 /*
  * This function can't run concurrently against mmu_notifier_register
  * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -53,7 +72,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
                 */
                if (mn->ops->release)
                        mn->ops->release(mn, mm);
-       srcu_read_unlock(&srcu, id);
 
        spin_lock(&mm->mmu_notifier_mm->lock);
        while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -69,6 +87,7 @@ void __mmu_notifier_release(struct mm_struct *mm)
                hlist_del_init_rcu(&mn->hlist);
        }
        spin_unlock(&mm->mmu_notifier_mm->lock);
+       srcu_read_unlock(&srcu, id);
 
        /*
         * synchronize_srcu here prevents mmu_notifier_release from returning to
@@ -325,6 +344,25 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
 
+/*
+ * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
+ */
+void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
+                                       struct mm_struct *mm)
+{
+       spin_lock(&mm->mmu_notifier_mm->lock);
+       /*
+        * Can not use list_del_rcu() since __mmu_notifier_release
+        * can delete it before we hold the lock.
+        */
+       hlist_del_init_rcu(&mn->hlist);
+       spin_unlock(&mm->mmu_notifier_mm->lock);
+
+       BUG_ON(atomic_read(&mm->mm_count) <= 0);
+       mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
+
 static int __init mmu_notifier_init(void)
 {
        return init_srcu_struct(&srcu);
index 3291e82d4352423cb1cd747eaa589da4b8a07a74..1e11df8fa7ecaecd274a3d0aaa1fe0aea4bb38ab 100644 (file)
@@ -258,8 +258,6 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
                unsigned long totalpages, const nodemask_t *nodemask,
                bool force_kill)
 {
-       if (task->exit_state)
-               return OOM_SCAN_CONTINUE;
        if (oom_unkillable_task(task, NULL, nodemask))
                return OOM_SCAN_CONTINUE;
 
@@ -559,28 +557,25 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
  * if a parallel OOM killing is already taking place that includes a zone in
  * the zonelist.  Otherwise, locks all zones in the zonelist and returns 1.
  */
-int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
 {
        struct zoneref *z;
        struct zone *zone;
-       int ret = 1;
+       bool ret = true;
 
        spin_lock(&zone_scan_lock);
-       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
                if (zone_is_oom_locked(zone)) {
-                       ret = 0;
+                       ret = false;
                        goto out;
                }
-       }
 
-       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
-               /*
-                * Lock each zone in the zonelist under zone_scan_lock so a
-                * parallel invocation of try_set_zonelist_oom() doesn't succeed
-                * when it shouldn't.
-                */
+       /*
+        * Lock each zone in the zonelist under zone_scan_lock so a parallel
+        * call to oom_zonelist_trylock() doesn't succeed when it shouldn't.
+        */
+       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
                zone_set_flag(zone, ZONE_OOM_LOCKED);
-       }
 
 out:
        spin_unlock(&zone_scan_lock);
@@ -592,15 +587,14 @@ out:
  * allocation attempts with zonelists containing them may now recall the OOM
  * killer, if necessary.
  */
-void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
 {
        struct zoneref *z;
        struct zone *zone;
 
        spin_lock(&zone_scan_lock);
-       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+       for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
                zone_clear_flag(zone, ZONE_OOM_LOCKED);
-       }
        spin_unlock(&zone_scan_lock);
 }
 
@@ -694,9 +688,9 @@ void pagefault_out_of_memory(void)
        if (mem_cgroup_oom_synchronize(true))
                return;
 
-       zonelist = node_zonelist(first_online_node, GFP_KERNEL);
-       if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
+       zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
+       if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
                out_of_memory(NULL, 0, 0, NULL, false);
-               clear_zonelist_oom(zonelist, GFP_KERNEL);
+               oom_zonelist_unlock(zonelist, GFP_KERNEL);
        }
 }
index e0c943014eb74ce3d4cb4c021d6f896740386d6d..91d73ef1744d6fbc5c4bbdf9782beb3b3e22da6b 100644 (file)
@@ -261,14 +261,11 @@ static unsigned long global_dirtyable_memory(void)
  */
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
 {
+       const unsigned long available_memory = global_dirtyable_memory();
        unsigned long background;
        unsigned long dirty;
-       unsigned long uninitialized_var(available_memory);
        struct task_struct *tsk;
 
-       if (!vm_dirty_bytes || !dirty_background_bytes)
-               available_memory = global_dirtyable_memory();
-
        if (vm_dirty_bytes)
                dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
        else
index ef44ad736ca17f79606021439b89a9fba4455564..18cee0d4c8a20705a4b3e7dd73e7d1d8a8b8d595 100644 (file)
@@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
        int migratetype = 0;
        int batch_free = 0;
        int to_free = count;
+       unsigned long nr_scanned;
 
        spin_lock(&zone->lock);
-       zone->pages_scanned = 0;
+       nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+       if (nr_scanned)
+               __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
 
        while (to_free) {
                struct page *page;
@@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone,
                                unsigned int order,
                                int migratetype)
 {
+       unsigned long nr_scanned;
        spin_lock(&zone->lock);
-       zone->pages_scanned = 0;
+       nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+       if (nr_scanned)
+               __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
 
        __free_one_page(page, pfn, zone, order, migratetype);
        if (unlikely(!is_migrate_isolate(migratetype)))
@@ -1257,15 +1263,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 {
        unsigned long flags;
-       int to_drain;
-       unsigned long batch;
+       int to_drain, batch;
 
        local_irq_save(flags);
        batch = ACCESS_ONCE(pcp->batch);
-       if (pcp->count >= batch)
-               to_drain = batch;
-       else
-               to_drain = pcp->count;
+       to_drain = min(pcp->count, batch);
        if (to_drain > 0) {
                free_pcppages_bulk(zone, to_drain, pcp);
                pcp->count -= to_drain;
@@ -1610,6 +1612,9 @@ again:
        }
 
        __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+       if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
+           !zone_is_fair_depleted(zone))
+               zone_set_flag(zone, ZONE_FAIR_DEPLETED);
 
        __count_zone_vm_events(PGALLOC, zone, 1 << order);
        zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1712,7 +1717,6 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
 {
        /* free_pages my go negative - that's OK */
        long min = mark;
-       long lowmem_reserve = z->lowmem_reserve[classzone_idx];
        int o;
        long free_cma = 0;
 
@@ -1727,7 +1731,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
                free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
 
-       if (free_pages - free_cma <= min + lowmem_reserve)
+       if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
                return false;
        for (o = 0; o < order; o++) {
                /* At the next order, this order's pages become unavailable */
@@ -1922,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 
 #endif /* CONFIG_NUMA */
 
+static void reset_alloc_batches(struct zone *preferred_zone)
+{
+       struct zone *zone = preferred_zone->zone_pgdat->node_zones;
+
+       do {
+               mod_zone_page_state(zone, NR_ALLOC_BATCH,
+                       high_wmark_pages(zone) - low_wmark_pages(zone) -
+                       atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+               zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+       } while (zone++ != preferred_zone);
+}
+
 /*
  * get_page_from_freelist goes through the zonelist trying to allocate
  * a page.
@@ -1939,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
        int did_zlc_setup = 0;          /* just call zlc_setup() one time */
        bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
                                (gfp_mask & __GFP_WRITE);
+       int nr_fair_skipped = 0;
+       bool zonelist_rescan;
 
 zonelist_scan:
+       zonelist_rescan = false;
+
        /*
         * Scan zonelist, looking for a zone with enough free.
         * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
@@ -1964,9 +1984,11 @@ zonelist_scan:
                 */
                if (alloc_flags & ALLOC_FAIR) {
                        if (!zone_local(preferred_zone, zone))
+                               break;
+                       if (zone_is_fair_depleted(zone)) {
+                               nr_fair_skipped++;
                                continue;
-                       if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
-                               continue;
+                       }
                }
                /*
                 * When allocating a page cache page for writing, we
@@ -2072,13 +2094,7 @@ this_zone_full:
                        zlc_mark_zone_full(zonelist, z);
        }
 
-       if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) {
-               /* Disable zlc cache for second zonelist scan */
-               zlc_active = 0;
-               goto zonelist_scan;
-       }
-
-       if (page)
+       if (page) {
                /*
                 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
                 * necessary to allocate the page. The expectation is
@@ -2087,8 +2103,37 @@ this_zone_full:
                 * for !PFMEMALLOC purposes.
                 */
                page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
+               return page;
+       }
 
-       return page;
+       /*
+        * The first pass makes sure allocations are spread fairly within the
+        * local node.  However, the local node might have free pages left
+        * after the fairness batches are exhausted, and remote zones haven't
+        * even been considered yet.  Try once more without fairness, and
+        * include remote zones now, before entering the slowpath and waking
+        * kswapd: prefer spilling to a remote zone over swapping locally.
+        */
+       if (alloc_flags & ALLOC_FAIR) {
+               alloc_flags &= ~ALLOC_FAIR;
+               if (nr_fair_skipped) {
+                       zonelist_rescan = true;
+                       reset_alloc_batches(preferred_zone);
+               }
+               if (nr_online_nodes > 1)
+                       zonelist_rescan = true;
+       }
+
+       if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
+               /* Disable zlc cache for second zonelist scan */
+               zlc_active = 0;
+               zonelist_rescan = true;
+       }
+
+       if (zonelist_rescan)
+               goto zonelist_scan;
+
+       return NULL;
 }
 
 /*
@@ -2201,8 +2246,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 {
        struct page *page;
 
-       /* Acquire the OOM killer lock for the zones in zonelist */
-       if (!try_set_zonelist_oom(zonelist, gfp_mask)) {
+       /* Acquire the per-zone oom lock for each zone */
+       if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
                schedule_timeout_uninterruptible(1);
                return NULL;
        }
@@ -2240,7 +2285,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        out_of_memory(zonelist, gfp_mask, order, nodemask, false);
 
 out:
-       clear_zonelist_oom(zonelist, gfp_mask);
+       oom_zonelist_unlock(zonelist, gfp_mask);
        return page;
 }
 
@@ -2409,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
        return page;
 }
 
-static void reset_alloc_batches(struct zonelist *zonelist,
-                               enum zone_type high_zoneidx,
-                               struct zone *preferred_zone)
-{
-       struct zoneref *z;
-       struct zone *zone;
-
-       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
-               /*
-                * Only reset the batches of zones that were actually
-                * considered in the fairness pass, we don't want to
-                * trash fairness information for zones that are not
-                * actually part of this zonelist's round-robin cycle.
-                */
-               if (!zone_local(preferred_zone, zone))
-                       continue;
-               mod_zone_page_state(zone, NR_ALLOC_BATCH,
-                       high_wmark_pages(zone) - low_wmark_pages(zone) -
-                       atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
-       }
-}
-
 static void wake_all_kswapds(unsigned int order,
                             struct zonelist *zonelist,
                             enum zone_type high_zoneidx,
@@ -2615,14 +2638,6 @@ rebalance:
        if (page)
                goto got_pg;
 
-       /*
-        * It can become very expensive to allocate transparent hugepages at
-        * fault, so use asynchronous memory compaction for THP unless it is
-        * khugepaged trying to collapse.
-        */
-       if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
-               migration_mode = MIGRATE_SYNC_LIGHT;
-
        /*
         * If compaction is deferred for high-order allocations, it is because
         * sync compaction recently failed. In this is the case and the caller
@@ -2633,6 +2648,15 @@ rebalance:
                                                (gfp_mask & __GFP_NO_KSWAPD))
                goto nopage;
 
+       /*
+        * It can become very expensive to allocate transparent hugepages at
+        * fault, so use asynchronous memory compaction for THP unless it is
+        * khugepaged trying to collapse.
+        */
+       if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
+                                               (current->flags & PF_KTHREAD))
+               migration_mode = MIGRATE_SYNC_LIGHT;
+
        /* Try direct reclaim and then allocating */
        page = __alloc_pages_direct_reclaim(gfp_mask, order,
                                        zonelist, high_zoneidx,
@@ -2766,28 +2790,11 @@ retry_cpuset:
        if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
                alloc_flags |= ALLOC_CMA;
 #endif
-retry:
        /* First allocation attempt */
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                        zonelist, high_zoneidx, alloc_flags,
                        preferred_zone, classzone_idx, migratetype);
        if (unlikely(!page)) {
-               /*
-                * The first pass makes sure allocations are spread
-                * fairly within the local node.  However, the local
-                * node might have free pages left after the fairness
-                * batches are exhausted, and remote zones haven't
-                * even been considered yet.  Try once more without
-                * fairness, and include remote zones now, before
-                * entering the slowpath and waking kswapd: prefer
-                * spilling to a remote zone over swapping locally.
-                */
-               if (alloc_flags & ALLOC_FAIR) {
-                       reset_alloc_batches(zonelist, high_zoneidx,
-                                           preferred_zone);
-                       alloc_flags &= ~ALLOC_FAIR;
-                       goto retry;
-               }
                /*
                 * Runtime PM, block IO and its error handling path
                 * can deadlock because I/O on the device might not
@@ -2962,7 +2969,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
  * Note this is not alloc_pages_exact_node() which allocates on a specific node,
  * but is not exact.
  */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
 {
        unsigned order = get_order(size);
        struct page *p = alloc_pages_node(nid, gfp_mask, order);
@@ -2970,7 +2977,6 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
                return NULL;
        return make_alloc_exact((unsigned long)page_address(p), order, size);
 }
-EXPORT_SYMBOL(alloc_pages_exact_nid);
 
 /**
  * free_pages_exact - release memory allocated via alloc_pages_exact()
@@ -3052,7 +3058,7 @@ static inline void show_node(struct zone *zone)
 void si_meminfo(struct sysinfo *val)
 {
        val->totalram = totalram_pages;
-       val->sharedram = 0;
+       val->sharedram = global_page_state(NR_SHMEM);
        val->freeram = global_page_state(NR_FREE_PAGES);
        val->bufferram = nr_blockdev_pages();
        val->totalhigh = totalhigh_pages;
@@ -3072,6 +3078,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
        for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
                managed_pages += pgdat->node_zones[zone_type].managed_pages;
        val->totalram = managed_pages;
+       val->sharedram = node_page_state(nid, NR_SHMEM);
        val->freeram = node_page_state(nid, NR_FREE_PAGES);
 #ifdef CONFIG_HIGHMEM
        val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3253,12 +3260,12 @@ void show_free_areas(unsigned int filter)
                        K(zone_page_state(zone, NR_BOUNCE)),
                        K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
                        K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
-                       zone->pages_scanned,
+                       K(zone_page_state(zone, NR_PAGES_SCANNED)),
                        (!zone_reclaimable(zone) ? "yes" : "no")
                        );
                printk("lowmem_reserve[]:");
                for (i = 0; i < MAX_NR_ZONES; i++)
-                       printk(" %lu", zone->lowmem_reserve[i]);
+                       printk(" %ld", zone->lowmem_reserve[i]);
                printk("\n");
        }
 
@@ -5579,7 +5586,7 @@ static void calculate_totalreserve_pages(void)
        for_each_online_pgdat(pgdat) {
                for (i = 0; i < MAX_NR_ZONES; i++) {
                        struct zone *zone = pgdat->node_zones + i;
-                       unsigned long max = 0;
+                       long max = 0;
 
                        /* Find valid and maximum lowmem_reserve in the zone */
                        for (j = i; j < MAX_NR_ZONES; j++) {
index 0ca36a7770b1b974eaea70d6357b7cdbd73aad3d..17b9172ec37f1ef05d49b45f244ec23f6d934ed1 100644 (file)
@@ -326,7 +326,6 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
  *     - thrashing threshold in memory tight systems
  */
 static pgoff_t count_history_pages(struct address_space *mapping,
-                                  struct file_ra_state *ra,
                                   pgoff_t offset, unsigned long max)
 {
        pgoff_t head;
@@ -349,7 +348,7 @@ static int try_context_readahead(struct address_space *mapping,
 {
        pgoff_t size;
 
-       size = count_history_pages(mapping, ra, offset, max);
+       size = count_history_pages(mapping, offset, max);
 
        /*
         * not enough history pages:
index af68b15a8fc1f99ede5cf82a38aecfc6b3b6eda6..302d1cf7ad07c385ebfeb381dd42af542b4787a5 100644 (file)
@@ -149,6 +149,19 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
                vm_unacct_memory(VM_ACCT(size));
 }
 
+static inline int shmem_reacct_size(unsigned long flags,
+               loff_t oldsize, loff_t newsize)
+{
+       if (!(flags & VM_NORESERVE)) {
+               if (VM_ACCT(newsize) > VM_ACCT(oldsize))
+                       return security_vm_enough_memory_mm(current->mm,
+                                       VM_ACCT(newsize) - VM_ACCT(oldsize));
+               else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
+                       vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
+       }
+       return 0;
+}
+
 /*
  * ... whereas tmpfs objects are accounted incrementally as
  * pages are allocated, in order to allow huge sparse files.
@@ -280,7 +293,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
  */
 static int shmem_add_to_page_cache(struct page *page,
                                   struct address_space *mapping,
-                                  pgoff_t index, gfp_t gfp, void *expected)
+                                  pgoff_t index, void *expected)
 {
        int error;
 
@@ -549,6 +562,10 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
                loff_t newsize = attr->ia_size;
 
                if (newsize != oldsize) {
+                       error = shmem_reacct_size(SHMEM_I(inode)->flags,
+                                       oldsize, newsize);
+                       if (error)
+                               return error;
                        i_size_write(inode, newsize);
                        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
                }
@@ -649,7 +666,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
         */
        if (!error)
                error = shmem_add_to_page_cache(*pagep, mapping, index,
-                                               GFP_NOWAIT, radswap);
+                                               radswap);
        if (error != -ENOMEM) {
                /*
                 * Truncation and eviction use free_swap_and_cache(), which
@@ -1095,7 +1112,7 @@ repeat:
                                                gfp & GFP_RECLAIM_MASK);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
-                                               gfp, swp_to_radix_entry(swap));
+                                               swp_to_radix_entry(swap));
                        /*
                         * We already confirmed swap under page lock, and make
                         * no memory allocation here, so usually no possibility
@@ -1149,7 +1166,7 @@ repeat:
                __SetPageSwapBacked(page);
                __set_page_locked(page);
                if (sgp == SGP_WRITE)
-                       init_page_accessed(page);
+                       __SetPageReferenced(page);
 
                error = mem_cgroup_charge_file(page, current->mm,
                                                gfp & GFP_RECLAIM_MASK);
@@ -1158,7 +1175,7 @@ repeat:
                error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
-                                                       gfp, NULL);
+                                                       NULL);
                        radix_tree_preload_end();
                }
                if (error) {
@@ -2932,16 +2949,16 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
        this.len = strlen(name);
        this.hash = 0; /* will go */
        sb = shm_mnt->mnt_sb;
+       path.mnt = mntget(shm_mnt);
        path.dentry = d_alloc_pseudo(sb, &this);
        if (!path.dentry)
                goto put_memory;
        d_set_d_op(path.dentry, &anon_ops);
-       path.mnt = mntget(shm_mnt);
 
        res = ERR_PTR(-ENOSPC);
        inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
        if (!inode)
-               goto put_dentry;
+               goto put_memory;
 
        inode->i_flags |= i_flags;
        d_instantiate(path.dentry, inode);
@@ -2949,19 +2966,19 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
        clear_nlink(inode);     /* It is unlinked */
        res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
        if (IS_ERR(res))
-               goto put_dentry;
+               goto put_path;
 
        res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
                  &shmem_file_operations);
        if (IS_ERR(res))
-               goto put_dentry;
+               goto put_path;
 
        return res;
 
-put_dentry:
-       path_put(&path);
 put_memory:
        shmem_unacct_size(flags, size);
+put_path:
+       path_put(&path);
        return res;
 }
 
index 3070b929a1bfa67778e415525403e9b36e392344..2e60bf3dedbb3925a015e1c66c0c871f03f28f6f 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,7 +191,6 @@ struct array_cache {
        unsigned int limit;
        unsigned int batchcount;
        unsigned int touched;
-       spinlock_t lock;
        void *entry[];  /*
                         * Must have this definition in here for the proper
                         * alignment of array_cache. Also simplifies accessing
@@ -203,6 +202,11 @@ struct array_cache {
                         */
 };
 
+struct alien_cache {
+       spinlock_t lock;
+       struct array_cache ac;
+};
+
 #define SLAB_OBJ_PFMEMALLOC    1
 static inline bool is_obj_pfmemalloc(void *objp)
 {
@@ -242,7 +246,8 @@ static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
 static int drain_freelist(struct kmem_cache *cache,
                        struct kmem_cache_node *n, int tofree);
 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
-                       int node);
+                       int node, struct list_head *list);
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
 static void cache_reap(struct work_struct *unused);
 
@@ -267,7 +272,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
 #define MAKE_LIST(cachep, listp, slab, nodeid)                         \
        do {                                                            \
                INIT_LIST_HEAD(listp);                                  \
-               list_splice(&(cachep->node[nodeid]->slab), listp);      \
+               list_splice(&get_node(cachep, nodeid)->slab, listp);    \
        } while (0)
 
 #define        MAKE_ALL_LISTS(cachep, ptr, nodeid)                             \
@@ -465,143 +470,6 @@ static struct kmem_cache kmem_cache_boot = {
        .name = "kmem_cache",
 };
 
-#define BAD_ALIEN_MAGIC 0x01020304ul
-
-#ifdef CONFIG_LOCKDEP
-
-/*
- * Slab sometimes uses the kmalloc slabs to store the slab headers
- * for other slabs "off slab".
- * The locking for this is tricky in that it nests within the locks
- * of all other slabs in a few places; to deal with this special
- * locking we put on-slab caches into a separate lock-class.
- *
- * We set lock class for alien array caches which are up during init.
- * The lock annotation will be lost if all cpus of a node goes down and
- * then comes back up during hotplug
- */
-static struct lock_class_key on_slab_l3_key;
-static struct lock_class_key on_slab_alc_key;
-
-static struct lock_class_key debugobj_l3_key;
-static struct lock_class_key debugobj_alc_key;
-
-static void slab_set_lock_classes(struct kmem_cache *cachep,
-               struct lock_class_key *l3_key, struct lock_class_key *alc_key,
-               int q)
-{
-       struct array_cache **alc;
-       struct kmem_cache_node *n;
-       int r;
-
-       n = cachep->node[q];
-       if (!n)
-               return;
-
-       lockdep_set_class(&n->list_lock, l3_key);
-       alc = n->alien;
-       /*
-        * FIXME: This check for BAD_ALIEN_MAGIC
-        * should go away when common slab code is taught to
-        * work even without alien caches.
-        * Currently, non NUMA code returns BAD_ALIEN_MAGIC
-        * for alloc_alien_cache,
-        */
-       if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
-               return;
-       for_each_node(r) {
-               if (alc[r])
-                       lockdep_set_class(&alc[r]->lock, alc_key);
-       }
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-       slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-       int node;
-
-       for_each_online_node(node)
-               slab_set_debugobj_lock_classes_node(cachep, node);
-}
-
-static void init_node_lock_keys(int q)
-{
-       int i;
-
-       if (slab_state < UP)
-               return;
-
-       for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
-               struct kmem_cache_node *n;
-               struct kmem_cache *cache = kmalloc_caches[i];
-
-               if (!cache)
-                       continue;
-
-               n = cache->node[q];
-               if (!n || OFF_SLAB(cache))
-                       continue;
-
-               slab_set_lock_classes(cache, &on_slab_l3_key,
-                               &on_slab_alc_key, q);
-       }
-}
-
-static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
-{
-       if (!cachep->node[q])
-               return;
-
-       slab_set_lock_classes(cachep, &on_slab_l3_key,
-                       &on_slab_alc_key, q);
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-       int node;
-
-       VM_BUG_ON(OFF_SLAB(cachep));
-       for_each_node(node)
-               on_slab_lock_classes_node(cachep, node);
-}
-
-static inline void init_lock_keys(void)
-{
-       int node;
-
-       for_each_node(node)
-               init_node_lock_keys(node);
-}
-#else
-static void init_node_lock_keys(int q)
-{
-}
-
-static inline void init_lock_keys(void)
-{
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-}
-
-static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-}
-#endif
-
 static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -792,13 +660,8 @@ static void start_cpu_timer(int cpu)
        }
 }
 
-static struct array_cache *alloc_arraycache(int node, int entries,
-                                           int batchcount, gfp_t gfp)
+static void init_arraycache(struct array_cache *ac, int limit, int batch)
 {
-       int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
-       struct array_cache *nc = NULL;
-
-       nc = kmalloc_node(memsize, gfp, node);
        /*
         * The array_cache structures contain pointers to free object.
         * However, when such objects are allocated or transferred to another
@@ -806,15 +669,24 @@ static struct array_cache *alloc_arraycache(int node, int entries,
         * valid references during a kmemleak scan. Therefore, kmemleak must
         * not scan such objects.
         */
-       kmemleak_no_scan(nc);
-       if (nc) {
-               nc->avail = 0;
-               nc->limit = entries;
-               nc->batchcount = batchcount;
-               nc->touched = 0;
-               spin_lock_init(&nc->lock);
+       kmemleak_no_scan(ac);
+       if (ac) {
+               ac->avail = 0;
+               ac->limit = limit;
+               ac->batchcount = batch;
+               ac->touched = 0;
        }
-       return nc;
+}
+
+static struct array_cache *alloc_arraycache(int node, int entries,
+                                           int batchcount, gfp_t gfp)
+{
+       size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
+       struct array_cache *ac = NULL;
+
+       ac = kmalloc_node(memsize, gfp, node);
+       init_arraycache(ac, entries, batchcount);
+       return ac;
 }
 
 static inline bool is_slab_pfmemalloc(struct page *page)
@@ -826,7 +698,7 @@ static inline bool is_slab_pfmemalloc(struct page *page)
 static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
                                                struct array_cache *ac)
 {
-       struct kmem_cache_node *n = cachep->node[numa_mem_id()];
+       struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
        struct page *page;
        unsigned long flags;
 
@@ -881,7 +753,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
                 * If there are empty slabs on the slabs_free list and we are
                 * being forced to refill the cache, mark this one !pfmemalloc.
                 */
-               n = cachep->node[numa_mem_id()];
+               n = get_node(cachep, numa_mem_id());
                if (!list_empty(&n->slabs_free) && force_refill) {
                        struct page *page = virt_to_head_page(objp);
                        ClearPageSlabPfmemalloc(page);
@@ -961,12 +833,13 @@ static int transfer_objects(struct array_cache *to,
 #define drain_alien_cache(cachep, alien) do { } while (0)
 #define reap_alien(cachep, n) do { } while (0)
 
-static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static inline struct alien_cache **alloc_alien_cache(int node,
+                                               int limit, gfp_t gfp)
 {
-       return (struct array_cache **)BAD_ALIEN_MAGIC;
+       return NULL;
 }
 
-static inline void free_alien_cache(struct array_cache **ac_ptr)
+static inline void free_alien_cache(struct alien_cache **ac_ptr)
 {
 }
 
@@ -992,46 +865,60 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
 static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
 static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
 
-static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static struct alien_cache *__alloc_alien_cache(int node, int entries,
+                                               int batch, gfp_t gfp)
+{
+       size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
+       struct alien_cache *alc = NULL;
+
+       alc = kmalloc_node(memsize, gfp, node);
+       init_arraycache(&alc->ac, entries, batch);
+       spin_lock_init(&alc->lock);
+       return alc;
+}
+
+static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
 {
-       struct array_cache **ac_ptr;
-       int memsize = sizeof(void *) * nr_node_ids;
+       struct alien_cache **alc_ptr;
+       size_t memsize = sizeof(void *) * nr_node_ids;
        int i;
 
        if (limit > 1)
                limit = 12;
-       ac_ptr = kzalloc_node(memsize, gfp, node);
-       if (ac_ptr) {
-               for_each_node(i) {
-                       if (i == node || !node_online(i))
-                               continue;
-                       ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
-                       if (!ac_ptr[i]) {
-                               for (i--; i >= 0; i--)
-                                       kfree(ac_ptr[i]);
-                               kfree(ac_ptr);
-                               return NULL;
-                       }
+       alc_ptr = kzalloc_node(memsize, gfp, node);
+       if (!alc_ptr)
+               return NULL;
+
+       for_each_node(i) {
+               if (i == node || !node_online(i))
+                       continue;
+               alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
+               if (!alc_ptr[i]) {
+                       for (i--; i >= 0; i--)
+                               kfree(alc_ptr[i]);
+                       kfree(alc_ptr);
+                       return NULL;
                }
        }
-       return ac_ptr;
+       return alc_ptr;
 }
 
-static void free_alien_cache(struct array_cache **ac_ptr)
+static void free_alien_cache(struct alien_cache **alc_ptr)
 {
        int i;
 
-       if (!ac_ptr)
+       if (!alc_ptr)
                return;
        for_each_node(i)
-           kfree(ac_ptr[i]);
-       kfree(ac_ptr);
+           kfree(alc_ptr[i]);
+       kfree(alc_ptr);
 }
 
 static void __drain_alien_cache(struct kmem_cache *cachep,
-                               struct array_cache *ac, int node)
+                               struct array_cache *ac, int node,
+                               struct list_head *list)
 {
-       struct kmem_cache_node *n = cachep->node[node];
+       struct kmem_cache_node *n = get_node(cachep, node);
 
        if (ac->avail) {
                spin_lock(&n->list_lock);
@@ -1043,7 +930,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
                if (n->shared)
                        transfer_objects(n->shared, ac, ac->limit);
 
-               free_block(cachep, ac->entry, ac->avail, node);
+               free_block(cachep, ac->entry, ac->avail, node, list);
                ac->avail = 0;
                spin_unlock(&n->list_lock);
        }
@@ -1057,28 +944,40 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
        int node = __this_cpu_read(slab_reap_node);
 
        if (n->alien) {
-               struct array_cache *ac = n->alien[node];
+               struct alien_cache *alc = n->alien[node];
+               struct array_cache *ac;
+
+               if (alc) {
+                       ac = &alc->ac;
+                       if (ac->avail && spin_trylock_irq(&alc->lock)) {
+                               LIST_HEAD(list);
 
-               if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
-                       __drain_alien_cache(cachep, ac, node);
-                       spin_unlock_irq(&ac->lock);
+                               __drain_alien_cache(cachep, ac, node, &list);
+                               spin_unlock_irq(&alc->lock);
+                               slabs_destroy(cachep, &list);
+                       }
                }
        }
 }
 
 static void drain_alien_cache(struct kmem_cache *cachep,
-                               struct array_cache **alien)
+                               struct alien_cache **alien)
 {
        int i = 0;
+       struct alien_cache *alc;
        struct array_cache *ac;
        unsigned long flags;
 
        for_each_online_node(i) {
-               ac = alien[i];
-               if (ac) {
-                       spin_lock_irqsave(&ac->lock, flags);
-                       __drain_alien_cache(cachep, ac, i);
-                       spin_unlock_irqrestore(&ac->lock, flags);
+               alc = alien[i];
+               if (alc) {
+                       LIST_HEAD(list);
+
+                       ac = &alc->ac;
+                       spin_lock_irqsave(&alc->lock, flags);
+                       __drain_alien_cache(cachep, ac, i, &list);
+                       spin_unlock_irqrestore(&alc->lock, flags);
+                       slabs_destroy(cachep, &list);
                }
        }
 }
@@ -1087,8 +986,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 {
        int nodeid = page_to_nid(virt_to_page(objp));
        struct kmem_cache_node *n;
-       struct array_cache *alien = NULL;
+       struct alien_cache *alien = NULL;
+       struct array_cache *ac;
        int node;
+       LIST_HEAD(list);
 
        node = numa_mem_id();
 
@@ -1099,21 +1000,25 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
        if (likely(nodeid == node))
                return 0;
 
-       n = cachep->node[node];
+       n = get_node(cachep, node);
        STATS_INC_NODEFREES(cachep);
        if (n->alien && n->alien[nodeid]) {
                alien = n->alien[nodeid];
+               ac = &alien->ac;
                spin_lock(&alien->lock);
-               if (unlikely(alien->avail == alien->limit)) {
+               if (unlikely(ac->avail == ac->limit)) {
                        STATS_INC_ACOVERFLOW(cachep);
-                       __drain_alien_cache(cachep, alien, nodeid);
+                       __drain_alien_cache(cachep, ac, nodeid, &list);
                }
-               ac_put_obj(cachep, alien, objp);
+               ac_put_obj(cachep, ac, objp);
                spin_unlock(&alien->lock);
+               slabs_destroy(cachep, &list);
        } else {
-               spin_lock(&(cachep->node[nodeid])->list_lock);
-               free_block(cachep, &objp, 1, nodeid);
-               spin_unlock(&(cachep->node[nodeid])->list_lock);
+               n = get_node(cachep, nodeid);
+               spin_lock(&n->list_lock);
+               free_block(cachep, &objp, 1, nodeid, &list);
+               spin_unlock(&n->list_lock);
+               slabs_destroy(cachep, &list);
        }
        return 1;
 }
@@ -1132,7 +1037,7 @@ static int init_cache_node_node(int node)
 {
        struct kmem_cache *cachep;
        struct kmem_cache_node *n;
-       const int memsize = sizeof(struct kmem_cache_node);
+       const size_t memsize = sizeof(struct kmem_cache_node);
 
        list_for_each_entry(cachep, &slab_caches, list) {
                /*
@@ -1140,7 +1045,8 @@ static int init_cache_node_node(int node)
                 * begin anything. Make sure some other cpu on this
                 * node has not already allocated this
                 */
-               if (!cachep->node[node]) {
+               n = get_node(cachep, node);
+               if (!n) {
                        n = kmalloc_node(memsize, GFP_KERNEL, node);
                        if (!n)
                                return -ENOMEM;
@@ -1156,11 +1062,11 @@ static int init_cache_node_node(int node)
                        cachep->node[node] = n;
                }
 
-               spin_lock_irq(&cachep->node[node]->list_lock);
-               cachep->node[node]->free_limit =
+               spin_lock_irq(&n->list_lock);
+               n->free_limit =
                        (1 + nr_cpus_node(node)) *
                        cachep->batchcount + cachep->num;
-               spin_unlock_irq(&cachep->node[node]->list_lock);
+               spin_unlock_irq(&n->list_lock);
        }
        return 0;
 }
@@ -1181,12 +1087,13 @@ static void cpuup_canceled(long cpu)
        list_for_each_entry(cachep, &slab_caches, list) {
                struct array_cache *nc;
                struct array_cache *shared;
-               struct array_cache **alien;
+               struct alien_cache **alien;
+               LIST_HEAD(list);
 
                /* cpu is dead; no one can alloc from it. */
                nc = cachep->array[cpu];
                cachep->array[cpu] = NULL;
-               n = cachep->node[node];
+               n = get_node(cachep, node);
 
                if (!n)
                        goto free_array_cache;
@@ -1196,7 +1103,7 @@ static void cpuup_canceled(long cpu)
                /* Free limit for this kmem_cache_node */
                n->free_limit -= cachep->batchcount;
                if (nc)
-                       free_block(cachep, nc->entry, nc->avail, node);
+                       free_block(cachep, nc->entry, nc->avail, node, &list);
 
                if (!cpumask_empty(mask)) {
                        spin_unlock_irq(&n->list_lock);
@@ -1206,7 +1113,7 @@ static void cpuup_canceled(long cpu)
                shared = n->shared;
                if (shared) {
                        free_block(cachep, shared->entry,
-                                  shared->avail, node);
+                                  shared->avail, node, &list);
                        n->shared = NULL;
                }
 
@@ -1221,6 +1128,7 @@ static void cpuup_canceled(long cpu)
                        free_alien_cache(alien);
                }
 free_array_cache:
+               slabs_destroy(cachep, &list);
                kfree(nc);
        }
        /*
@@ -1229,7 +1137,7 @@ free_array_cache:
         * shrink each nodelist to its limit.
         */
        list_for_each_entry(cachep, &slab_caches, list) {
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                if (!n)
                        continue;
                drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -1260,7 +1168,7 @@ static int cpuup_prepare(long cpu)
        list_for_each_entry(cachep, &slab_caches, list) {
                struct array_cache *nc;
                struct array_cache *shared = NULL;
-               struct array_cache **alien = NULL;
+               struct alien_cache **alien = NULL;
 
                nc = alloc_arraycache(node, cachep->limit,
                                        cachep->batchcount, GFP_KERNEL);
@@ -1284,7 +1192,7 @@ static int cpuup_prepare(long cpu)
                        }
                }
                cachep->array[cpu] = nc;
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                BUG_ON(!n);
 
                spin_lock_irq(&n->list_lock);
@@ -1305,13 +1213,7 @@ static int cpuup_prepare(long cpu)
                spin_unlock_irq(&n->list_lock);
                kfree(shared);
                free_alien_cache(alien);
-               if (cachep->flags & SLAB_DEBUG_OBJECTS)
-                       slab_set_debugobj_lock_classes_node(cachep, node);
-               else if (!OFF_SLAB(cachep) &&
-                        !(cachep->flags & SLAB_DESTROY_BY_RCU))
-                       on_slab_lock_classes_node(cachep, node);
        }
-       init_node_lock_keys(node);
 
        return 0;
 bad:
@@ -1395,7 +1297,7 @@ static int __meminit drain_cache_node_node(int node)
        list_for_each_entry(cachep, &slab_caches, list) {
                struct kmem_cache_node *n;
 
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                if (!n)
                        continue;
 
@@ -1575,10 +1477,6 @@ void __init kmem_cache_init(void)
 
                memcpy(ptr, cpu_cache_get(kmem_cache),
                       sizeof(struct arraycache_init));
-               /*
-                * Do not assume that spinlocks can be initialized via memcpy:
-                */
-               spin_lock_init(&ptr->lock);
 
                kmem_cache->array[smp_processor_id()] = ptr;
 
@@ -1588,10 +1486,6 @@ void __init kmem_cache_init(void)
                       != &initarray_generic.cache);
                memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
                       sizeof(struct arraycache_init));
-               /*
-                * Do not assume that spinlocks can be initialized via memcpy:
-                */
-               spin_lock_init(&ptr->lock);
 
                kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
        }
@@ -1628,9 +1522,6 @@ void __init kmem_cache_init_late(void)
                        BUG();
        mutex_unlock(&slab_mutex);
 
-       /* Annotate slab for lockdep -- annotate the malloc caches */
-       init_lock_keys();
-
        /* Done! */
        slab_state = FULL;
 
@@ -1690,14 +1581,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
        printk(KERN_WARNING "  cache: %s, object size: %d, order: %d\n",
                cachep->name, cachep->size, cachep->gfporder);
 
-       for_each_online_node(node) {
+       for_each_kmem_cache_node(cachep, node, n) {
                unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
                unsigned long active_slabs = 0, num_slabs = 0;
 
-               n = cachep->node[node];
-               if (!n)
-                       continue;
-
                spin_lock_irqsave(&n->list_lock, flags);
                list_for_each_entry(page, &n->slabs_full, lru) {
                        active_objs += cachep->num;
@@ -1724,7 +1611,8 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
 }
 
 /*
- * Interface to system's page allocator. No need to hold the cache-lock.
+ * Interface to system's page allocator. No need to hold the
+ * kmem_cache_node ->list_lock.
  *
  * If we requested dmaable memory, we will get it. Even if we
  * did not request dmaable memory, we might get it, but that
@@ -2026,9 +1914,9 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
  * @cachep: cache pointer being destroyed
  * @page: page pointer being destroyed
  *
- * Destroy all the objs in a slab, and release the mem back to the system.
- * Before calling the slab must have been unlinked from the cache.  The
- * cache-lock is not held/needed.
+ * Destroy all the objs in a slab page, and release the mem back to the system.
+ * Before calling the slab page must have been unlinked from the cache. The
+ * kmem_cache_node ->list_lock is not held/needed.
  */
 static void slab_destroy(struct kmem_cache *cachep, struct page *page)
 {
@@ -2060,6 +1948,16 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
                kmem_cache_free(cachep->freelist_cache, freelist);
 }
 
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
+{
+       struct page *page, *n;
+
+       list_for_each_entry_safe(page, n, list, lru) {
+               list_del(&page->lru);
+               slab_destroy(cachep, page);
+       }
+}
+
 /**
  * calculate_slab_order - calculate size (page order) of slabs
  * @cachep: pointer to the cache that is being created
@@ -2405,17 +2303,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                return err;
        }
 
-       if (flags & SLAB_DEBUG_OBJECTS) {
-               /*
-                * Would deadlock through slab_destroy()->call_rcu()->
-                * debug_object_activate()->kmem_cache_alloc().
-                */
-               WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
-
-               slab_set_debugobj_lock_classes(cachep);
-       } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
-               on_slab_lock_classes(cachep);
-
        return 0;
 }
 
@@ -2434,7 +2321,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
 {
 #ifdef CONFIG_SMP
        check_irq_off();
-       assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
+       assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
 #endif
 }
 
@@ -2442,7 +2329,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
 {
 #ifdef CONFIG_SMP
        check_irq_off();
-       assert_spin_locked(&cachep->node[node]->list_lock);
+       assert_spin_locked(&get_node(cachep, node)->list_lock);
 #endif
 }
 
@@ -2462,12 +2349,16 @@ static void do_drain(void *arg)
        struct kmem_cache *cachep = arg;
        struct array_cache *ac;
        int node = numa_mem_id();
+       struct kmem_cache_node *n;
+       LIST_HEAD(list);
 
        check_irq_off();
        ac = cpu_cache_get(cachep);
-       spin_lock(&cachep->node[node]->list_lock);
-       free_block(cachep, ac->entry, ac->avail, node);
-       spin_unlock(&cachep->node[node]->list_lock);
+       n = get_node(cachep, node);
+       spin_lock(&n->list_lock);
+       free_block(cachep, ac->entry, ac->avail, node, &list);
+       spin_unlock(&n->list_lock);
+       slabs_destroy(cachep, &list);
        ac->avail = 0;
 }
 
@@ -2478,17 +2369,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
 
        on_each_cpu(do_drain, cachep, 1);
        check_irq_on();
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (n && n->alien)
+       for_each_kmem_cache_node(cachep, node, n)
+               if (n->alien)
                        drain_alien_cache(cachep, n->alien);
-       }
 
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (n)
-                       drain_array(cachep, n, n->shared, 1, node);
-       }
+       for_each_kmem_cache_node(cachep, node, n)
+               drain_array(cachep, n, n->shared, 1, node);
 }
 
 /*
@@ -2534,17 +2420,14 @@ out:
 
 int __kmem_cache_shrink(struct kmem_cache *cachep)
 {
-       int ret = 0, i = 0;
+       int ret = 0;
+       int node;
        struct kmem_cache_node *n;
 
        drain_cpu_caches(cachep);
 
        check_irq_on();
-       for_each_online_node(i) {
-               n = cachep->node[i];
-               if (!n)
-                       continue;
-
+       for_each_kmem_cache_node(cachep, node, n) {
                drain_freelist(cachep, n, slabs_tofree(cachep, n));
 
                ret += !list_empty(&n->slabs_full) ||
@@ -2566,13 +2449,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
            kfree(cachep->array[i]);
 
        /* NUMA: free the node structures */
-       for_each_online_node(i) {
-               n = cachep->node[i];
-               if (n) {
-                       kfree(n->shared);
-                       free_alien_cache(n->alien);
-                       kfree(n);
-               }
+       for_each_kmem_cache_node(cachep, i, n) {
+               kfree(n->shared);
+               free_alien_cache(n->alien);
+               kfree(n);
+               cachep->node[i] = NULL;
        }
        return 0;
 }
@@ -2751,7 +2632,7 @@ static int cache_grow(struct kmem_cache *cachep,
 
        /* Take the node list lock to change the colour_next on this node */
        check_irq_off();
-       n = cachep->node[nodeid];
+       n = get_node(cachep, nodeid);
        spin_lock(&n->list_lock);
 
        /* Get colour for the slab, and cal the next value. */
@@ -2920,7 +2801,7 @@ retry:
                 */
                batchcount = BATCHREFILL_LIMIT;
        }
-       n = cachep->node[node];
+       n = get_node(cachep, node);
 
        BUG_ON(ac->avail > 0 || !n);
        spin_lock(&n->list_lock);
@@ -3060,7 +2941,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 
 static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
 {
-       if (cachep == kmem_cache)
+       if (unlikely(cachep == kmem_cache))
                return false;
 
        return should_failslab(cachep->object_size, flags, cachep->flags);
@@ -3169,8 +3050,8 @@ retry:
                nid = zone_to_nid(zone);
 
                if (cpuset_zone_allowed_hardwall(zone, flags) &&
-                       cache->node[nid] &&
-                       cache->node[nid]->free_objects) {
+                       get_node(cache, nid) &&
+                       get_node(cache, nid)->free_objects) {
                                obj = ____cache_alloc_node(cache,
                                        flags | GFP_THISNODE, nid);
                                if (obj)
@@ -3233,7 +3114,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
        int x;
 
        VM_BUG_ON(nodeid > num_online_nodes());
-       n = cachep->node[nodeid];
+       n = get_node(cachep, nodeid);
        BUG_ON(!n);
 
 retry:
@@ -3304,7 +3185,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
        if (nodeid == NUMA_NO_NODE)
                nodeid = slab_node;
 
-       if (unlikely(!cachep->node[nodeid])) {
+       if (unlikely(!get_node(cachep, nodeid))) {
                /* Node not bootstrapped yet */
                ptr = fallback_alloc(cachep, flags);
                goto out;
@@ -3405,12 +3286,13 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 
 /*
  * Caller needs to acquire correct kmem_cache_node's list_lock
+ * @list: List of detached free slabs should be freed by caller
  */
-static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
-                      int node)
+static void free_block(struct kmem_cache *cachep, void **objpp,
+                       int nr_objects, int node, struct list_head *list)
 {
        int i;
-       struct kmem_cache_node *n;
+       struct kmem_cache_node *n = get_node(cachep, node);
 
        for (i = 0; i < nr_objects; i++) {
                void *objp;
@@ -3420,7 +3302,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                objp = objpp[i];
 
                page = virt_to_head_page(objp);
-               n = cachep->node[node];
                list_del(&page->lru);
                check_spinlock_acquired_node(cachep, node);
                slab_put_obj(cachep, page, objp, node);
@@ -3431,13 +3312,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                if (page->active == 0) {
                        if (n->free_objects > n->free_limit) {
                                n->free_objects -= cachep->num;
-                               /* No need to drop any previously held
-                                * lock here, even if we have a off-slab slab
-                                * descriptor it is guaranteed to come from
-                                * a different cache, refer to comments before
-                                * alloc_slabmgmt.
-                                */
-                               slab_destroy(cachep, page);
+                               list_add_tail(&page->lru, list);
                        } else {
                                list_add(&page->lru, &n->slabs_free);
                        }
@@ -3456,13 +3331,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
        int batchcount;
        struct kmem_cache_node *n;
        int node = numa_mem_id();
+       LIST_HEAD(list);
 
        batchcount = ac->batchcount;
 #if DEBUG
        BUG_ON(!batchcount || batchcount > ac->avail);
 #endif
        check_irq_off();
-       n = cachep->node[node];
+       n = get_node(cachep, node);
        spin_lock(&n->list_lock);
        if (n->shared) {
                struct array_cache *shared_array = n->shared;
@@ -3477,7 +3353,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
                }
        }
 
-       free_block(cachep, ac->entry, batchcount, node);
+       free_block(cachep, ac->entry, batchcount, node, &list);
 free_done:
 #if STATS
        {
@@ -3498,6 +3374,7 @@ free_done:
        }
 #endif
        spin_unlock(&n->list_lock);
+       slabs_destroy(cachep, &list);
        ac->avail -= batchcount;
        memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
 }
@@ -3754,7 +3631,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
        int node;
        struct kmem_cache_node *n;
        struct array_cache *new_shared;
-       struct array_cache **new_alien = NULL;
+       struct alien_cache **new_alien = NULL;
 
        for_each_online_node(node) {
 
@@ -3775,15 +3652,16 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
                        }
                }
 
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                if (n) {
                        struct array_cache *shared = n->shared;
+                       LIST_HEAD(list);
 
                        spin_lock_irq(&n->list_lock);
 
                        if (shared)
                                free_block(cachep, shared->entry,
-                                               shared->avail, node);
+                                               shared->avail, node, &list);
 
                        n->shared = new_shared;
                        if (!n->alien) {
@@ -3793,6 +3671,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
                        n->free_limit = (1 + nr_cpus_node(node)) *
                                        cachep->batchcount + cachep->num;
                        spin_unlock_irq(&n->list_lock);
+                       slabs_destroy(cachep, &list);
                        kfree(shared);
                        free_alien_cache(new_alien);
                        continue;
@@ -3820,9 +3699,8 @@ fail:
                /* Cache is not active yet. Roll back what we did */
                node--;
                while (node >= 0) {
-                       if (cachep->node[node]) {
-                               n = cachep->node[node];
-
+                       n = get_node(cachep, node);
+                       if (n) {
                                kfree(n->shared);
                                free_alien_cache(n->alien);
                                kfree(n);
@@ -3883,12 +3761,20 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
        cachep->shared = shared;
 
        for_each_online_cpu(i) {
+               LIST_HEAD(list);
                struct array_cache *ccold = new->new[i];
+               int node;
+               struct kmem_cache_node *n;
+
                if (!ccold)
                        continue;
-               spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
-               free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
-               spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+
+               node = cpu_to_mem(i);
+               n = get_node(cachep, node);
+               spin_lock_irq(&n->list_lock);
+               free_block(cachep, ccold->entry, ccold->avail, node, &list);
+               spin_unlock_irq(&n->list_lock);
+               slabs_destroy(cachep, &list);
                kfree(ccold);
        }
        kfree(new);
@@ -3996,6 +3882,7 @@ skip_setup:
 static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
                         struct array_cache *ac, int force, int node)
 {
+       LIST_HEAD(list);
        int tofree;
 
        if (!ac || !ac->avail)
@@ -4008,12 +3895,13 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
                        tofree = force ? ac->avail : (ac->limit + 4) / 5;
                        if (tofree > ac->avail)
                                tofree = (ac->avail + 1) / 2;
-                       free_block(cachep, ac->entry, tofree, node);
+                       free_block(cachep, ac->entry, tofree, node, &list);
                        ac->avail -= tofree;
                        memmove(ac->entry, &(ac->entry[tofree]),
                                sizeof(void *) * ac->avail);
                }
                spin_unlock_irq(&n->list_lock);
+               slabs_destroy(cachep, &list);
        }
 }
 
@@ -4048,7 +3936,7 @@ static void cache_reap(struct work_struct *w)
                 * have established with reasonable certainty that
                 * we can do some work if the lock was obtained.
                 */
-               n = searchp->node[node];
+               n = get_node(searchp, node);
 
                reap_alien(searchp, n);
 
@@ -4100,10 +3988,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
 
        active_objs = 0;
        num_slabs = 0;
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (!n)
-                       continue;
+       for_each_kmem_cache_node(cachep, node, n) {
 
                check_irq_on();
                spin_lock_irq(&n->list_lock);
@@ -4328,10 +4213,7 @@ static int leaks_show(struct seq_file *m, void *p)
 
        x[1] = 0;
 
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (!n)
-                       continue;
+       for_each_kmem_cache_node(cachep, node, n) {
 
                check_irq_on();
                spin_lock_irq(&n->list_lock);
index 961a3fb1f5a2c69454e123e5a2d7c34b652c2a1c..0e0fdd3658409e0eee9a89fa51b1ea45cfc96466 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -256,13 +256,12 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
                return cachep;
 
        pr_err("%s: Wrong slab cache. %s but object is from %s\n",
-               __FUNCTION__, cachep->name, s->name);
+              __func__, cachep->name, s->name);
        WARN_ON_ONCE(1);
        return s;
 }
-#endif
-
 
+#ifndef CONFIG_SLOB
 /*
  * The slab lists for all objects.
  */
@@ -277,7 +276,7 @@ struct kmem_cache_node {
        unsigned int free_limit;
        unsigned int colour_next;       /* Per-node cache coloring */
        struct array_cache *shared;     /* shared per node */
-       struct array_cache **alien;     /* on other nodes */
+       struct alien_cache **alien;     /* on other nodes */
        unsigned long next_reap;        /* updated without locking */
        int free_touched;               /* updated without locking */
 #endif
@@ -294,5 +293,22 @@ struct kmem_cache_node {
 
 };
 
+static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
+{
+       return s->node[node];
+}
+
+/*
+ * Iterator over all nodes. The body will be executed for each node that has
+ * a kmem_cache_node structure allocated (which is true for all online nodes)
+ */
+#define for_each_kmem_cache_node(__s, __node, __n) \
+       for (__node = 0; __n = get_node(__s, __node), __node < nr_node_ids; __node++) \
+                if (__n)
+
+#endif
+
 void *slab_next(struct seq_file *m, void *p, loff_t *pos);
 void slab_stop(struct seq_file *m, void *p);
+
+#endif /* MM_SLAB_H */
index d31c4bacc6a203b0bc555bd76c2a97e90e78fa6c..d319502b24038b7ad0aee1023b88c5c6501fc39e 100644 (file)
@@ -19,6 +19,8 @@
 #include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <linux/memcontrol.h>
+
+#define CREATE_TRACE_POINTS
 #include <trace/events/kmem.h>
 
 #include "slab.h"
@@ -787,3 +789,102 @@ static int __init slab_proc_init(void)
 }
 module_init(slab_proc_init);
 #endif /* CONFIG_SLABINFO */
+
+static __always_inline void *__do_krealloc(const void *p, size_t new_size,
+                                          gfp_t flags)
+{
+       void *ret;
+       size_t ks = 0;
+
+       if (p)
+               ks = ksize(p);
+
+       if (ks >= new_size)
+               return (void *)p;
+
+       ret = kmalloc_track_caller(new_size, flags);
+       if (ret && p)
+               memcpy(ret, p, ks);
+
+       return ret;
+}
+
+/**
+ * __krealloc - like krealloc() but don't free @p.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * This function is like krealloc() except it never frees the originally
+ * allocated buffer. Use this if you don't want to free the buffer immediately
+ * like, for example, with RCU.
+ */
+void *__krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+       if (unlikely(!new_size))
+               return ZERO_SIZE_PTR;
+
+       return __do_krealloc(p, new_size, flags);
+
+}
+EXPORT_SYMBOL(__krealloc);
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * The contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.  If @p is %NULL, krealloc()
+ * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
+ * %NULL pointer, the object pointed to is freed.
+ */
+void *krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+       void *ret;
+
+       if (unlikely(!new_size)) {
+               kfree(p);
+               return ZERO_SIZE_PTR;
+       }
+
+       ret = __do_krealloc(p, new_size, flags);
+       if (ret && p != ret)
+               kfree(p);
+
+       return ret;
+}
+EXPORT_SYMBOL(krealloc);
+
+/**
+ * kzfree - like kfree but zero memory
+ * @p: object to free memory of
+ *
+ * The memory of the object @p points to is zeroed before freed.
+ * If @p is %NULL, kzfree() does nothing.
+ *
+ * Note: this function zeroes the whole allocated buffer which can be a good
+ * deal bigger than the requested buffer size passed to kmalloc(). So be
+ * careful when using this function in performance sensitive code.
+ */
+void kzfree(const void *p)
+{
+       size_t ks;
+       void *mem = (void *)p;
+
+       if (unlikely(ZERO_OR_NULL_PTR(mem)))
+               return;
+       ks = ksize(mem);
+       memset(mem, 0, ks);
+       kfree(mem);
+}
+EXPORT_SYMBOL(kzfree);
+
+/* Tracepoints definitions. */
+EXPORT_TRACEPOINT_SYMBOL(kmalloc);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
+EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kfree);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
index 73004808537ea841e05c85e0b68312ea645eb114..3e8afcc07a760c552135cfb3c79ac924ee9e5494 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -233,11 +233,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
  *                     Core slab cache functions
  *******************************************************************/
 
-static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
-{
-       return s->node[node];
-}
-
 /* Verify that a pointer has an address that is valid within a slab page */
 static inline int check_valid_pointer(struct kmem_cache *s,
                                struct page *page, const void *object)
@@ -288,6 +283,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
        for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
                        __p += (__s)->size)
 
+#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
+       for (__p = (__addr), __idx = 1; __idx <= __objects;\
+                       __p += (__s)->size, __idx++)
+
 /* Determine object index from a given position */
 static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
 {
@@ -382,9 +381,9 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
        if (s->flags & __CMPXCHG_DOUBLE) {
                if (cmpxchg_double(&page->freelist, &page->counters,
-                       freelist_old, counters_old,
-                       freelist_new, counters_new))
-               return 1;
+                                  freelist_old, counters_old,
+                                  freelist_new, counters_new))
+                       return 1;
        } else
 #endif
        {
@@ -418,9 +417,9 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
     defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
        if (s->flags & __CMPXCHG_DOUBLE) {
                if (cmpxchg_double(&page->freelist, &page->counters,
-                       freelist_old, counters_old,
-                       freelist_new, counters_new))
-               return 1;
+                                  freelist_old, counters_old,
+                                  freelist_new, counters_new))
+                       return 1;
        } else
 #endif
        {
@@ -944,60 +943,6 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
        }
 }
 
-/*
- * Hooks for other subsystems that check memory allocations. In a typical
- * production configuration these hooks all should produce no code at all.
- */
-static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
-{
-       kmemleak_alloc(ptr, size, 1, flags);
-}
-
-static inline void kfree_hook(const void *x)
-{
-       kmemleak_free(x);
-}
-
-static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
-{
-       flags &= gfp_allowed_mask;
-       lockdep_trace_alloc(flags);
-       might_sleep_if(flags & __GFP_WAIT);
-
-       return should_failslab(s->object_size, flags, s->flags);
-}
-
-static inline void slab_post_alloc_hook(struct kmem_cache *s,
-                                       gfp_t flags, void *object)
-{
-       flags &= gfp_allowed_mask;
-       kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
-       kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
-}
-
-static inline void slab_free_hook(struct kmem_cache *s, void *x)
-{
-       kmemleak_free_recursive(x, s->flags);
-
-       /*
-        * Trouble is that we may no longer disable interrupts in the fast path
-        * So in order to make the debug calls that expect irqs to be
-        * disabled we need to disable interrupts temporarily.
-        */
-#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
-       {
-               unsigned long flags;
-
-               local_irq_save(flags);
-               kmemcheck_slab_free(s, x, s->object_size);
-               debug_check_no_locks_freed(x, s->object_size);
-               local_irq_restore(flags);
-       }
-#endif
-       if (!(s->flags & SLAB_DEBUG_OBJECTS))
-               debug_check_no_obj_freed(x, s->object_size);
-}
-
 /*
  * Tracking of fully allocated slabs for debugging purposes.
  */
@@ -1282,6 +1227,12 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
 static inline void dec_slabs_node(struct kmem_cache *s, int node,
                                                        int objects) {}
 
+#endif /* CONFIG_SLUB_DEBUG */
+
+/*
+ * Hooks for other subsystems that check memory allocations. In a typical
+ * production configuration these hooks all should produce no code at all.
+ */
 static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
 {
        kmemleak_alloc(ptr, size, 1, flags);
@@ -1293,21 +1244,44 @@ static inline void kfree_hook(const void *x)
 }
 
 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
-                                                       { return 0; }
+{
+       flags &= gfp_allowed_mask;
+       lockdep_trace_alloc(flags);
+       might_sleep_if(flags & __GFP_WAIT);
+
+       return should_failslab(s->object_size, flags, s->flags);
+}
 
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
-               void *object)
+static inline void slab_post_alloc_hook(struct kmem_cache *s,
+                                       gfp_t flags, void *object)
 {
-       kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
-               flags & gfp_allowed_mask);
+       flags &= gfp_allowed_mask;
+       kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+       kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
 }
 
 static inline void slab_free_hook(struct kmem_cache *s, void *x)
 {
        kmemleak_free_recursive(x, s->flags);
-}
 
-#endif /* CONFIG_SLUB_DEBUG */
+       /*
+        * Trouble is that we may no longer disable interrupts in the fast path
+        * So in order to make the debug calls that expect irqs to be
+        * disabled we need to disable interrupts temporarily.
+        */
+#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
+       {
+               unsigned long flags;
+
+               local_irq_save(flags);
+               kmemcheck_slab_free(s, x, s->object_size);
+               debug_check_no_locks_freed(x, s->object_size);
+               local_irq_restore(flags);
+       }
+#endif
+       if (!(s->flags & SLAB_DEBUG_OBJECTS))
+               debug_check_no_obj_freed(x, s->object_size);
+}
 
 /*
  * Slab allocation and freeing
@@ -1409,9 +1383,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
        struct page *page;
        void *start;
-       void *last;
        void *p;
        int order;
+       int idx;
 
        BUG_ON(flags & GFP_SLAB_BUG_MASK);
 
@@ -1432,14 +1406,13 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
        if (unlikely(s->flags & SLAB_POISON))
                memset(start, POISON_INUSE, PAGE_SIZE << order);
 
-       last = start;
-       for_each_object(p, s, start, page->objects) {
-               setup_object(s, page, last);
-               set_freepointer(s, last, p);
-               last = p;
+       for_each_object_idx(p, idx, s, start, page->objects) {
+               setup_object(s, page, p);
+               if (likely(idx < page->objects))
+                       set_freepointer(s, p, p + s->size);
+               else
+                       set_freepointer(s, p, NULL);
        }
-       setup_object(s, page, last);
-       set_freepointer(s, last, NULL);
 
        page->freelist = start;
        page->inuse = page->objects;
@@ -2162,6 +2135,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
        static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);
        int node;
+       struct kmem_cache_node *n;
 
        if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
                return;
@@ -2176,15 +2150,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
                pr_warn("  %s debugging increased min order, use slub_debug=O to disable.\n",
                        s->name);
 
-       for_each_online_node(node) {
-               struct kmem_cache_node *n = get_node(s, node);
+       for_each_kmem_cache_node(s, node, n) {
                unsigned long nr_slabs;
                unsigned long nr_objs;
                unsigned long nr_free;
 
-               if (!n)
-                       continue;
-
                nr_free  = count_partial(n, count_free);
                nr_slabs = node_nr_slabs(n);
                nr_objs  = node_nr_objs(n);
@@ -2928,13 +2898,10 @@ static void early_kmem_cache_node_alloc(int node)
 static void free_kmem_cache_nodes(struct kmem_cache *s)
 {
        int node;
+       struct kmem_cache_node *n;
 
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = s->node[node];
-
-               if (n)
-                       kmem_cache_free(kmem_cache_node, n);
-
+       for_each_kmem_cache_node(s, node, n) {
+               kmem_cache_free(kmem_cache_node, n);
                s->node[node] = NULL;
        }
 }
@@ -3222,12 +3189,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 static inline int kmem_cache_close(struct kmem_cache *s)
 {
        int node;
+       struct kmem_cache_node *n;
 
        flush_all(s);
        /* Attempt to free all objects */
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = get_node(s, node);
-
+       for_each_kmem_cache_node(s, node, n) {
                free_partial(s, n);
                if (n->nr_partial || slabs_node(s, node))
                        return 1;
@@ -3412,9 +3378,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
                return -ENOMEM;
 
        flush_all(s);
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               n = get_node(s, node);
-
+       for_each_kmem_cache_node(s, node, n) {
                if (!n->nr_partial)
                        continue;
 
@@ -3586,6 +3550,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
 {
        int node;
        struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+       struct kmem_cache_node *n;
 
        memcpy(s, static_cache, kmem_cache->object_size);
 
@@ -3595,19 +3560,16 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
         * IPIs around.
         */
        __flush_cpu_slab(s, smp_processor_id());
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = get_node(s, node);
+       for_each_kmem_cache_node(s, node, n) {
                struct page *p;
 
-               if (n) {
-                       list_for_each_entry(p, &n->partial, lru)
-                               p->slab_cache = s;
+               list_for_each_entry(p, &n->partial, lru)
+                       p->slab_cache = s;
 
 #ifdef CONFIG_SLUB_DEBUG
-                       list_for_each_entry(p, &n->full, lru)
-                               p->slab_cache = s;
+               list_for_each_entry(p, &n->full, lru)
+                       p->slab_cache = s;
 #endif
-               }
        }
        list_add(&s->list, &slab_caches);
        return s;
@@ -3960,16 +3922,14 @@ static long validate_slab_cache(struct kmem_cache *s)
        unsigned long count = 0;
        unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
                                sizeof(unsigned long), GFP_KERNEL);
+       struct kmem_cache_node *n;
 
        if (!map)
                return -ENOMEM;
 
        flush_all(s);
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = get_node(s, node);
-
+       for_each_kmem_cache_node(s, node, n)
                count += validate_slab_node(s, n, map);
-       }
        kfree(map);
        return count;
 }
@@ -4123,6 +4083,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
        int node;
        unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
                                     sizeof(unsigned long), GFP_KERNEL);
+       struct kmem_cache_node *n;
 
        if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
                                     GFP_TEMPORARY)) {
@@ -4132,8 +4093,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
        /* Push back cpu slabs */
        flush_all(s);
 
-       for_each_node_state(node, N_NORMAL_MEMORY) {
-               struct kmem_cache_node *n = get_node(s, node);
+       for_each_kmem_cache_node(s, node, n) {
                unsigned long flags;
                struct page *page;
 
@@ -4205,7 +4165,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
 #endif
 
 #ifdef SLUB_RESILIENCY_TEST
-static void resiliency_test(void)
+static void __init resiliency_test(void)
 {
        u8 *p;
 
@@ -4332,8 +4292,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
        get_online_mems();
 #ifdef CONFIG_SLUB_DEBUG
        if (flags & SO_ALL) {
-               for_each_node_state(node, N_NORMAL_MEMORY) {
-                       struct kmem_cache_node *n = get_node(s, node);
+               struct kmem_cache_node *n;
+
+               for_each_kmem_cache_node(s, node, n) {
 
                        if (flags & SO_TOTAL)
                                x = atomic_long_read(&n->total_objects);
@@ -4349,9 +4310,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
        } else
 #endif
        if (flags & SO_PARTIAL) {
-               for_each_node_state(node, N_NORMAL_MEMORY) {
-                       struct kmem_cache_node *n = get_node(s, node);
+               struct kmem_cache_node *n;
 
+               for_each_kmem_cache_node(s, node, n) {
                        if (flags & SO_TOTAL)
                                x = count_partial(n, count_total);
                        else if (flags & SO_OBJECTS)
@@ -4364,7 +4325,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
        }
        x = sprintf(buf, "%lu", total);
 #ifdef CONFIG_NUMA
-       for_each_node_state(node, N_NORMAL_MEMORY)
+       for (node = 0; node < nr_node_ids; node++)
                if (nodes[node])
                        x += sprintf(buf + x, " N%d=%lu",
                                        node, nodes[node]);
@@ -4378,16 +4339,12 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 static int any_slab_objects(struct kmem_cache *s)
 {
        int node;
+       struct kmem_cache_node *n;
 
-       for_each_online_node(node) {
-               struct kmem_cache_node *n = get_node(s, node);
-
-               if (!n)
-                       continue;
-
+       for_each_kmem_cache_node(s, node, n)
                if (atomic_long_read(&n->total_objects))
                        return 1;
-       }
+
        return 0;
 }
 #endif
@@ -4509,7 +4466,7 @@ SLAB_ATTR_RO(ctor);
 
 static ssize_t aliases_show(struct kmem_cache *s, char *buf)
 {
-       return sprintf(buf, "%d\n", s->refcount - 1);
+       return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
 }
 SLAB_ATTR_RO(aliases);
 
@@ -5171,12 +5128,6 @@ static char *create_unique_id(struct kmem_cache *s)
                *p++ = '-';
        p += sprintf(p, "%07d", s->size);
 
-#ifdef CONFIG_MEMCG_KMEM
-       if (!is_root_cache(s))
-               p += sprintf(p, "-%08d",
-                               memcg_cache_id(s->memcg_params->memcg));
-#endif
-
        BUG_ON(p > name + ID_STR_LENGTH - 1);
        return name;
 }
@@ -5342,13 +5293,9 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
        unsigned long nr_objs = 0;
        unsigned long nr_free = 0;
        int node;
+       struct kmem_cache_node *n;
 
-       for_each_online_node(node) {
-               struct kmem_cache_node *n = get_node(s, node);
-
-               if (!n)
-                       continue;
-
+       for_each_kmem_cache_node(s, node, n) {
                nr_slabs += node_nr_slabs(n);
                nr_objs += node_nr_objs(n);
                nr_free += count_partial(n, count_free);
index 9e8e3472248bb8dfa10107fb212974e1343ffa4a..c789d01c9ec31db05803566bc23baee090908366 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -501,7 +501,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
                SetPageActive(page);
                lru += LRU_ACTIVE;
                add_page_to_lru_list(page, lruvec, lru);
-               trace_mm_lru_activate(page, page_to_pfn(page));
+               trace_mm_lru_activate(page);
 
                __count_vm_event(PGACTIVATE);
                update_page_reclaim_stat(lruvec, file, 1);
@@ -589,6 +589,9 @@ static void __lru_cache_activate_page(struct page *page)
  * inactive,unreferenced       ->      inactive,referenced
  * inactive,referenced         ->      active,unreferenced
  * active,unreferenced         ->      active,referenced
+ *
+ * When a newly allocated page is not yet visible, so safe for non-atomic ops,
+ * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
  */
 void mark_page_accessed(struct page *page)
 {
@@ -614,17 +617,6 @@ void mark_page_accessed(struct page *page)
 }
 EXPORT_SYMBOL(mark_page_accessed);
 
-/*
- * Used to mark_page_accessed(page) that is not visible yet and when it is
- * still safe to use non-atomic ops
- */
-void init_page_accessed(struct page *page)
-{
-       if (!PageReferenced(page))
-               __SetPageReferenced(page);
-}
-EXPORT_SYMBOL(init_page_accessed);
-
 static void __lru_cache_add(struct page *page)
 {
        struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
@@ -996,7 +988,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
        SetPageLRU(page);
        add_page_to_lru_list(page, lruvec, lru);
        update_page_reclaim_stat(lruvec, file, active);
-       trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
+       trace_mm_lru_insertion(page, lru);
 }
 
 /*
index d5ea733c508265aaba619248d973ec640a73d04a..7b6608df2ee803d9d4345dc11290eb17100199f4 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -16,9 +16,6 @@
 
 #include "internal.h"
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
 /**
  * kstrdup - allocate space for and copy an existing string
  * @s: the string to duplicate
@@ -112,97 +109,6 @@ void *memdup_user(const void __user *src, size_t len)
 }
 EXPORT_SYMBOL(memdup_user);
 
-static __always_inline void *__do_krealloc(const void *p, size_t new_size,
-                                          gfp_t flags)
-{
-       void *ret;
-       size_t ks = 0;
-
-       if (p)
-               ks = ksize(p);
-
-       if (ks >= new_size)
-               return (void *)p;
-
-       ret = kmalloc_track_caller(new_size, flags);
-       if (ret && p)
-               memcpy(ret, p, ks);
-
-       return ret;
-}
-
-/**
- * __krealloc - like krealloc() but don't free @p.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * This function is like krealloc() except it never frees the originally
- * allocated buffer. Use this if you don't want to free the buffer immediately
- * like, for example, with RCU.
- */
-void *__krealloc(const void *p, size_t new_size, gfp_t flags)
-{
-       if (unlikely(!new_size))
-               return ZERO_SIZE_PTR;
-
-       return __do_krealloc(p, new_size, flags);
-
-}
-EXPORT_SYMBOL(__krealloc);
-
-/**
- * krealloc - reallocate memory. The contents will remain unchanged.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * The contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes.  If @p is %NULL, krealloc()
- * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
- * %NULL pointer, the object pointed to is freed.
- */
-void *krealloc(const void *p, size_t new_size, gfp_t flags)
-{
-       void *ret;
-
-       if (unlikely(!new_size)) {
-               kfree(p);
-               return ZERO_SIZE_PTR;
-       }
-
-       ret = __do_krealloc(p, new_size, flags);
-       if (ret && p != ret)
-               kfree(p);
-
-       return ret;
-}
-EXPORT_SYMBOL(krealloc);
-
-/**
- * kzfree - like kfree but zero memory
- * @p: object to free memory of
- *
- * The memory of the object @p points to is zeroed before freed.
- * If @p is %NULL, kzfree() does nothing.
- *
- * Note: this function zeroes the whole allocated buffer which can be a good
- * deal bigger than the requested buffer size passed to kmalloc(). So be
- * careful when using this function in performance sensitive code.
- */
-void kzfree(const void *p)
-{
-       size_t ks;
-       void *mem = (void *)p;
-
-       if (unlikely(ZERO_OR_NULL_PTR(mem)))
-               return;
-       ks = ksize(mem);
-       memset(mem, 0, ks);
-       kfree(mem);
-}
-EXPORT_SYMBOL(kzfree);
-
 /*
  * strndup_user - duplicate an existing string from user space
  * @s: The string to duplicate
@@ -504,11 +410,3 @@ out_mm:
 out:
        return res;
 }
-
-/* Tracepoints definitions. */
-EXPORT_TRACEPOINT_SYMBOL(kmalloc);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
-EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kfree);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
index f64632b671964a0788b43e8d30ae0edb7b292292..2b0aa5486092dca2745c2ec201cda44db033550c 100644 (file)
@@ -1270,19 +1270,15 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
 }
 EXPORT_SYMBOL_GPL(unmap_kernel_range);
 
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
 {
        unsigned long addr = (unsigned long)area->addr;
        unsigned long end = addr + get_vm_area_size(area);
        int err;
 
-       err = vmap_page_range(addr, end, prot, *pages);
-       if (err > 0) {
-               *pages += err;
-               err = 0;
-       }
+       err = vmap_page_range(addr, end, prot, pages);
 
-       return err;
+       return err > 0 ? 0 : err;
 }
 EXPORT_SYMBOL_GPL(map_vm_area);
 
@@ -1548,7 +1544,7 @@ void *vmap(struct page **pages, unsigned int count,
        if (!area)
                return NULL;
 
-       if (map_vm_area(area, prot, &pages)) {
+       if (map_vm_area(area, prot, pages)) {
                vunmap(area->addr);
                return NULL;
        }
@@ -1566,7 +1562,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        const int order = 0;
        struct page **pages;
        unsigned int nr_pages, array_size, i;
-       gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+       const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+       const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
 
        nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
        array_size = (nr_pages * sizeof(struct page *));
@@ -1589,12 +1586,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
        for (i = 0; i < area->nr_pages; i++) {
                struct page *page;
-               gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
 
                if (node == NUMA_NO_NODE)
-                       page = alloc_page(tmp_mask);
+                       page = alloc_page(alloc_mask);
                else
-                       page = alloc_pages_node(node, tmp_mask, order);
+                       page = alloc_pages_node(node, alloc_mask, order);
 
                if (unlikely(!page)) {
                        /* Successfully allocated i pages, free them in __vunmap() */
@@ -1602,9 +1598,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
                        goto fail;
                }
                area->pages[i] = page;
+               if (gfp_mask & __GFP_WAIT)
+                       cond_resched();
        }
 
-       if (map_vm_area(area, prot, &pages))
+       if (map_vm_area(area, prot, pages))
                goto fail;
        return area->addr;
 
@@ -2690,14 +2688,14 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
 
        prev_end = VMALLOC_START;
 
-       spin_lock(&vmap_area_lock);
+       rcu_read_lock();
 
        if (list_empty(&vmap_area_list)) {
                vmi->largest_chunk = VMALLOC_TOTAL;
                goto out;
        }
 
-       list_for_each_entry(va, &vmap_area_list, list) {
+       list_for_each_entry_rcu(va, &vmap_area_list, list) {
                unsigned long addr = va->va_start;
 
                /*
@@ -2724,7 +2722,7 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
                vmi->largest_chunk = VMALLOC_END - prev_end;
 
 out:
-       spin_unlock(&vmap_area_lock);
+       rcu_read_unlock();
 }
 #endif
 
index 0f16ffe8eb67c6fcd0350add4a5a4b6092cb6905..d2f65c856350eb179f83c20cde87d961d92eb28f 100644 (file)
 #include <trace/events/vmscan.h>
 
 struct scan_control {
-       /* Incremented by the number of inactive pages that were scanned */
-       unsigned long nr_scanned;
-
-       /* Number of pages freed so far during a call to shrink_zones() */
-       unsigned long nr_reclaimed;
-
        /* How many pages shrink_list() should reclaim */
        unsigned long nr_to_reclaim;
 
-       unsigned long hibernation_mode;
-
        /* This context's GFP mask */
        gfp_t gfp_mask;
 
-       int may_writepage;
-
-       /* Can mapped pages be reclaimed? */
-       int may_unmap;
-
-       /* Can pages be swapped as part of reclaim? */
-       int may_swap;
-
+       /* Allocation order */
        int order;
 
-       /* Scan (total_size >> priority) pages at once */
-       int priority;
-
-       /* anon vs. file LRUs scanning "ratio" */
-       int swappiness;
+       /*
+        * Nodemask of nodes allowed by the caller. If NULL, all nodes
+        * are scanned.
+        */
+       nodemask_t      *nodemask;
 
        /*
         * The memory cgroup that hit its limit and as a result is the
@@ -95,11 +80,27 @@ struct scan_control {
         */
        struct mem_cgroup *target_mem_cgroup;
 
-       /*
-        * Nodemask of nodes allowed by the caller. If NULL, all nodes
-        * are scanned.
-        */
-       nodemask_t      *nodemask;
+       /* Scan (total_size >> priority) pages at once */
+       int priority;
+
+       unsigned int may_writepage:1;
+
+       /* Can mapped pages be reclaimed? */
+       unsigned int may_unmap:1;
+
+       /* Can pages be swapped as part of reclaim? */
+       unsigned int may_swap:1;
+
+       unsigned int hibernation_mode:1;
+
+       /* One of the zones is ready for compaction */
+       unsigned int compaction_ready:1;
+
+       /* Incremented by the number of inactive pages that were scanned */
+       unsigned long nr_scanned;
+
+       /* Number of pages freed so far during a call to shrink_zones() */
+       unsigned long nr_reclaimed;
 };
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +137,11 @@ struct scan_control {
  * From 0 .. 100.  Higher means more swappy.
  */
 int vm_swappiness = 60;
-unsigned long vm_total_pages;  /* The total number of pages which the VM controls */
+/*
+ * The total number of pages which are beyond the high watermark within all
+ * zones.
+ */
+unsigned long vm_total_pages;
 
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
@@ -169,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
 
 bool zone_reclaimable(struct zone *zone)
 {
-       return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+       return zone_page_state(zone, NR_PAGES_SCANNED) <
+               zone_reclaimable_pages(zone) * 6;
 }
 
 static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -1503,7 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
 
        if (global_reclaim(sc)) {
-               zone->pages_scanned += nr_scanned;
+               __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
                if (current_is_kswapd())
                        __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
                else
@@ -1693,7 +1699,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
        nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
                                     &nr_scanned, sc, isolate_mode, lru);
        if (global_reclaim(sc))
-               zone->pages_scanned += nr_scanned;
+               __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
 
        reclaim_stat->recent_scanned[file] += nr_taken;
 
@@ -1750,7 +1756,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
         * Count referenced pages from currently used mappings as rotated,
         * even though only some of them are actually re-activated.  This
         * helps balance scan pressure between file and anonymous pages in
-        * get_scan_ratio.
+        * get_scan_count.
         */
        reclaim_stat->recent_rotated[file] += nr_rotated;
 
@@ -1865,8 +1871,8 @@ enum scan_balance {
  * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
  * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
  */
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
-                          unsigned long *nr)
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
+                          struct scan_control *sc, unsigned long *nr)
 {
        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
        u64 fraction[2];
@@ -1909,7 +1915,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * using the memory controller's swap limit feature would be
         * too expensive.
         */
-       if (!global_reclaim(sc) && !sc->swappiness) {
+       if (!global_reclaim(sc) && !swappiness) {
                scan_balance = SCAN_FILE;
                goto out;
        }
@@ -1919,16 +1925,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * system is close to OOM, scan both anon and file equally
         * (unless the swappiness setting disagrees with swapping).
         */
-       if (!sc->priority && sc->swappiness) {
+       if (!sc->priority && swappiness) {
                scan_balance = SCAN_EQUAL;
                goto out;
        }
 
-       anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
-               get_lru_size(lruvec, LRU_INACTIVE_ANON);
-       file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
-               get_lru_size(lruvec, LRU_INACTIVE_FILE);
-
        /*
         * Prevent the reclaimer from falling into the cache trap: as
         * cache pages start out inactive, every cache fault will tip
@@ -1939,9 +1940,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * anon pages.  Try to detect this based on file LRU size.
         */
        if (global_reclaim(sc)) {
-               unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
+               unsigned long zonefile;
+               unsigned long zonefree;
 
-               if (unlikely(file + free <= high_wmark_pages(zone))) {
+               zonefree = zone_page_state(zone, NR_FREE_PAGES);
+               zonefile = zone_page_state(zone, NR_ACTIVE_FILE) +
+                          zone_page_state(zone, NR_INACTIVE_FILE);
+
+               if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) {
                        scan_balance = SCAN_ANON;
                        goto out;
                }
@@ -1962,7 +1968,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * With swappiness at 100, anonymous and file have the same priority.
         * This scanning priority is essentially the inverse of IO cost.
         */
-       anon_prio = sc->swappiness;
+       anon_prio = swappiness;
        file_prio = 200 - anon_prio;
 
        /*
@@ -1976,6 +1982,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         *
         * anon in [0], file in [1]
         */
+
+       anon  = get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+               get_lru_size(lruvec, LRU_INACTIVE_ANON);
+       file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+               get_lru_size(lruvec, LRU_INACTIVE_FILE);
+
        spin_lock_irq(&zone->lru_lock);
        if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
                reclaim_stat->recent_scanned[0] /= 2;
@@ -2052,7 +2064,8 @@ out:
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+                         struct scan_control *sc)
 {
        unsigned long nr[NR_LRU_LISTS];
        unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2076,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
        struct blk_plug plug;
        bool scan_adjusted;
 
-       get_scan_count(lruvec, sc, nr);
+       get_scan_count(lruvec, swappiness, sc, nr);
 
        /* Record the original scan target for proportional adjustments later */
        memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2254,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
        }
 }
 
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
 {
        unsigned long nr_reclaimed, nr_scanned;
+       bool reclaimable = false;
 
        do {
                struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2273,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                memcg = mem_cgroup_iter(root, NULL, &reclaim);
                do {
                        struct lruvec *lruvec;
+                       int swappiness;
 
                        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+                       swappiness = mem_cgroup_swappiness(memcg);
 
-                       sc->swappiness = mem_cgroup_swappiness(memcg);
-                       shrink_lruvec(lruvec, sc);
+                       shrink_lruvec(lruvec, swappiness, sc);
 
                        /*
                         * Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2302,21 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                           sc->nr_scanned - nr_scanned,
                           sc->nr_reclaimed - nr_reclaimed);
 
+               if (sc->nr_reclaimed - nr_reclaimed)
+                       reclaimable = true;
+
        } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
                                         sc->nr_scanned - nr_scanned, sc));
+
+       return reclaimable;
 }
 
 /* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline bool compaction_ready(struct zone *zone, int order)
 {
        unsigned long balance_gap, watermark;
        bool watermark_ok;
 
-       /* Do not consider compaction for orders reclaim is meant to satisfy */
-       if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
-               return false;
-
        /*
         * Compaction takes time to run and there are potentially other
         * callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2325,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
         */
        balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
                        zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
-       watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+       watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
        watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
 
        /*
         * If compaction is deferred, reclaim up to a point where
         * compaction will have a chance of success when re-enabled
         */
-       if (compaction_deferred(zone, sc->order))
+       if (compaction_deferred(zone, order))
                return watermark_ok;
 
        /* If compaction is not ready to start, keep reclaiming */
-       if (!compaction_suitable(zone, sc->order))
+       if (!compaction_suitable(zone, order))
                return false;
 
        return watermark_ok;
@@ -2342,10 +2358,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
  * If a zone is deemed to be full of pinned pages then just give it a light
  * scan then give up on it.
  *
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
+ * Returns true if a zone was reclaimable.
  */
 static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 {
@@ -2354,13 +2367,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        unsigned long nr_soft_reclaimed;
        unsigned long nr_soft_scanned;
        unsigned long lru_pages = 0;
-       bool aborted_reclaim = false;
        struct reclaim_state *reclaim_state = current->reclaim_state;
        gfp_t orig_mask;
        struct shrink_control shrink = {
                .gfp_mask = sc->gfp_mask,
        };
        enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+       bool reclaimable = false;
 
        /*
         * If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2404,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                        if (sc->priority != DEF_PRIORITY &&
                            !zone_reclaimable(zone))
                                continue;       /* Let kswapd poll it */
-                       if (IS_ENABLED(CONFIG_COMPACTION)) {
-                               /*
-                                * If we already have plenty of memory free for
-                                * compaction in this zone, don't free any more.
-                                * Even though compaction is invoked for any
-                                * non-zero order, only frequent costly order
-                                * reclamation is disruptive enough to become a
-                                * noticeable problem, like transparent huge
-                                * page allocations.
-                                */
-                               if ((zonelist_zone_idx(z) <= requested_highidx)
-                                   && compaction_ready(zone, sc)) {
-                                       aborted_reclaim = true;
-                                       continue;
-                               }
+
+                       /*
+                        * If we already have plenty of memory free for
+                        * compaction in this zone, don't free any more.
+                        * Even though compaction is invoked for any
+                        * non-zero order, only frequent costly order
+                        * reclamation is disruptive enough to become a
+                        * noticeable problem, like transparent huge
+                        * page allocations.
+                        */
+                       if (IS_ENABLED(CONFIG_COMPACTION) &&
+                           sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+                           zonelist_zone_idx(z) <= requested_highidx &&
+                           compaction_ready(zone, sc->order)) {
+                               sc->compaction_ready = true;
+                               continue;
                        }
+
                        /*
                         * This steals pages from memory cgroups over softlimit
                         * and returns the number of reclaimed pages and
@@ -2419,10 +2434,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                                                &nr_soft_scanned);
                        sc->nr_reclaimed += nr_soft_reclaimed;
                        sc->nr_scanned += nr_soft_scanned;
+                       if (nr_soft_reclaimed)
+                               reclaimable = true;
                        /* need some check for avoid more shrink_zone() */
                }
 
-               shrink_zone(zone, sc);
+               if (shrink_zone(zone, sc))
+                       reclaimable = true;
+
+               if (global_reclaim(sc) &&
+                   !reclaimable && zone_reclaimable(zone))
+                       reclaimable = true;
        }
 
        /*
@@ -2445,27 +2467,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
         */
        sc->gfp_mask = orig_mask;
 
-       return aborted_reclaim;
-}
-
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
-               struct scan_control *sc)
-{
-       struct zoneref *z;
-       struct zone *zone;
-
-       for_each_zone_zonelist_nodemask(zone, z, zonelist,
-                       gfp_zone(sc->gfp_mask), sc->nodemask) {
-               if (!populated_zone(zone))
-                       continue;
-               if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-                       continue;
-               if (zone_reclaimable(zone))
-                       return false;
-       }
-
-       return true;
+       return reclaimable;
 }
 
 /*
@@ -2489,7 +2491,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 {
        unsigned long total_scanned = 0;
        unsigned long writeback_threshold;
-       bool aborted_reclaim;
+       bool zones_reclaimable;
 
        delayacct_freepages_start();
 
@@ -2500,11 +2502,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
                                sc->priority);
                sc->nr_scanned = 0;
-               aborted_reclaim = shrink_zones(zonelist, sc);
+               zones_reclaimable = shrink_zones(zonelist, sc);
 
                total_scanned += sc->nr_scanned;
                if (sc->nr_reclaimed >= sc->nr_to_reclaim)
-                       goto out;
+                       break;
+
+               if (sc->compaction_ready)
+                       break;
 
                /*
                 * If we're getting trouble reclaiming, start doing
@@ -2526,28 +2531,19 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                                WB_REASON_TRY_TO_FREE_PAGES);
                        sc->may_writepage = 1;
                }
-       } while (--sc->priority >= 0 && !aborted_reclaim);
+       } while (--sc->priority >= 0);
 
-out:
        delayacct_freepages_end();
 
        if (sc->nr_reclaimed)
                return sc->nr_reclaimed;
 
-       /*
-        * As hibernation is going on, kswapd is freezed so that it can't mark
-        * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
-        * check.
-        */
-       if (oom_killer_disabled)
-               return 0;
-
        /* Aborted reclaim to try compaction? don't OOM, then */
-       if (aborted_reclaim)
+       if (sc->compaction_ready)
                return 1;
 
-       /* top priority shrink_zones still had more to do? don't OOM, then */
-       if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+       /* Any of the zones still reclaimable?  Don't OOM. */
+       if (zones_reclaimable)
                return 1;
 
        return 0;
@@ -2684,15 +2680,14 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 {
        unsigned long nr_reclaimed;
        struct scan_control sc = {
+               .nr_to_reclaim = SWAP_CLUSTER_MAX,
                .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
+               .order = order,
+               .nodemask = nodemask,
+               .priority = DEF_PRIORITY,
                .may_writepage = !laptop_mode,
-               .nr_to_reclaim = SWAP_CLUSTER_MAX,
                .may_unmap = 1,
                .may_swap = 1,
-               .order = order,
-               .priority = DEF_PRIORITY,
-               .target_mem_cgroup = NULL,
-               .nodemask = nodemask,
        };
 
        /*
@@ -2722,17 +2717,14 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
                                                unsigned long *nr_scanned)
 {
        struct scan_control sc = {
-               .nr_scanned = 0,
                .nr_to_reclaim = SWAP_CLUSTER_MAX,
+               .target_mem_cgroup = memcg,
                .may_writepage = !laptop_mode,
                .may_unmap = 1,
                .may_swap = !noswap,
-               .order = 0,
-               .priority = 0,
-               .swappiness = mem_cgroup_swappiness(memcg),
-               .target_mem_cgroup = memcg,
        };
        struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+       int swappiness = mem_cgroup_swappiness(memcg);
 
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2740,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
         * will pick up pages from other mem cgroup's as well. We hack
         * the priority and make it zero.
         */
-       shrink_lruvec(lruvec, &sc);
+       shrink_lruvec(lruvec, swappiness, &sc);
 
        trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
@@ -2764,16 +2756,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
        unsigned long nr_reclaimed;
        int nid;
        struct scan_control sc = {
-               .may_writepage = !laptop_mode,
-               .may_unmap = 1,
-               .may_swap = !noswap,
                .nr_to_reclaim = SWAP_CLUSTER_MAX,
-               .order = 0,
-               .priority = DEF_PRIORITY,
-               .target_mem_cgroup = memcg,
-               .nodemask = NULL, /* we don't care the placement */
                .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                                (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+               .target_mem_cgroup = memcg,
+               .priority = DEF_PRIORITY,
+               .may_writepage = !laptop_mode,
+               .may_unmap = 1,
+               .may_swap = !noswap,
        };
 
        /*
@@ -3031,12 +3021,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
        unsigned long nr_soft_scanned;
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
+               .order = order,
                .priority = DEF_PRIORITY,
+               .may_writepage = !laptop_mode,
                .may_unmap = 1,
                .may_swap = 1,
-               .may_writepage = !laptop_mode,
-               .order = order,
-               .target_mem_cgroup = NULL,
        };
        count_vm_event(PAGEOUTRUN);
 
@@ -3417,14 +3406,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 {
        struct reclaim_state reclaim_state;
        struct scan_control sc = {
+               .nr_to_reclaim = nr_to_reclaim,
                .gfp_mask = GFP_HIGHUSER_MOVABLE,
-               .may_swap = 1,
-               .may_unmap = 1,
+               .priority = DEF_PRIORITY,
                .may_writepage = 1,
-               .nr_to_reclaim = nr_to_reclaim,
+               .may_unmap = 1,
+               .may_swap = 1,
                .hibernation_mode = 1,
-               .order = 0,
-               .priority = DEF_PRIORITY,
        };
        struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
        struct task_struct *p = current;
@@ -3604,13 +3592,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
        struct task_struct *p = current;
        struct reclaim_state reclaim_state;
        struct scan_control sc = {
-               .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
-               .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
-               .may_swap = 1,
                .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
                .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
                .order = order,
                .priority = ZONE_RECLAIM_PRIORITY,
+               .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+               .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+               .may_swap = 1,
        };
        struct shrink_control shrink = {
                .gfp_mask = sc.gfp_mask,
index b37bd49bfd55e206e0845535bc42e7817502ce3b..e9ab104b956f127f79598231863bd62b7f559147 100644 (file)
@@ -200,7 +200,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
                        continue;
 
                threshold = (*calculate_pressure)(zone);
-               for_each_possible_cpu(cpu)
+               for_each_online_cpu(cpu)
                        per_cpu_ptr(zone->pageset, cpu)->stat_threshold
                                                        = threshold;
        }
@@ -763,6 +763,7 @@ const char * const vmstat_text[] = {
        "nr_shmem",
        "nr_dirtied",
        "nr_written",
+       "nr_pages_scanned",
 
 #ifdef CONFIG_NUMA
        "numa_hit",
@@ -1067,7 +1068,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   min_wmark_pages(zone),
                   low_wmark_pages(zone),
                   high_wmark_pages(zone),
-                  zone->pages_scanned,
+                  zone_page_state(zone, NR_PAGES_SCANNED),
                   zone->spanned_pages,
                   zone->present_pages,
                   zone->managed_pages);
@@ -1077,10 +1078,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                                zone_page_state(zone, i));
 
        seq_printf(m,
-                  "\n        protection: (%lu",
+                  "\n        protection: (%ld",
                   zone->lowmem_reserve[0]);
        for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
-               seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+               seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
        seq_printf(m,
                   ")"
                   "\n  pagesets");
index 01df13a7e2e1e51bdd0ea5c8d06e3aad1d65d066..a05790b1915eb4faba938cd0264c8a5ccfd2aa9e 100644 (file)
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -51,6 +51,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/zbud.h>
+#include <linux/zpool.h>
 
 /*****************
  * Structures
@@ -112,6 +113,90 @@ struct zbud_header {
        bool under_reclaim;
 };
 
+/*****************
+ * zpool
+ ****************/
+
+#ifdef CONFIG_ZPOOL
+
+static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
+{
+       return zpool_evict(pool, handle);
+}
+
+static struct zbud_ops zbud_zpool_ops = {
+       .evict =        zbud_zpool_evict
+};
+
+static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+       return zbud_create_pool(gfp, &zbud_zpool_ops);
+}
+
+static void zbud_zpool_destroy(void *pool)
+{
+       zbud_destroy_pool(pool);
+}
+
+static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+                       unsigned long *handle)
+{
+       return zbud_alloc(pool, size, gfp, handle);
+}
+static void zbud_zpool_free(void *pool, unsigned long handle)
+{
+       zbud_free(pool, handle);
+}
+
+static int zbud_zpool_shrink(void *pool, unsigned int pages,
+                       unsigned int *reclaimed)
+{
+       unsigned int total = 0;
+       int ret = -EINVAL;
+
+       while (total < pages) {
+               ret = zbud_reclaim_page(pool, 8);
+               if (ret < 0)
+                       break;
+               total++;
+       }
+
+       if (reclaimed)
+               *reclaimed = total;
+
+       return ret;
+}
+
+static void *zbud_zpool_map(void *pool, unsigned long handle,
+                       enum zpool_mapmode mm)
+{
+       return zbud_map(pool, handle);
+}
+static void zbud_zpool_unmap(void *pool, unsigned long handle)
+{
+       zbud_unmap(pool, handle);
+}
+
+static u64 zbud_zpool_total_size(void *pool)
+{
+       return zbud_get_pool_size(pool) * PAGE_SIZE;
+}
+
+static struct zpool_driver zbud_zpool_driver = {
+       .type =         "zbud",
+       .owner =        THIS_MODULE,
+       .create =       zbud_zpool_create,
+       .destroy =      zbud_zpool_destroy,
+       .malloc =       zbud_zpool_malloc,
+       .free =         zbud_zpool_free,
+       .shrink =       zbud_zpool_shrink,
+       .map =          zbud_zpool_map,
+       .unmap =        zbud_zpool_unmap,
+       .total_size =   zbud_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
+
 /*****************
  * Helpers
 *****************/
@@ -122,7 +207,7 @@ enum buddy {
 };
 
 /* Converts an allocation size in bytes to size in zbud chunks */
-static int size_to_chunks(int size)
+static int size_to_chunks(size_t size)
 {
        return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
 }
@@ -247,7 +332,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
  * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
  * a new page.
  */
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
                        unsigned long *handle)
 {
        int chunks, i, freechunks;
@@ -511,11 +596,20 @@ static int __init init_zbud(void)
        /* Make sure the zbud header will fit in one chunk */
        BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
        pr_info("loaded\n");
+
+#ifdef CONFIG_ZPOOL
+       zpool_register_driver(&zbud_zpool_driver);
+#endif
+
        return 0;
 }
 
 static void __exit exit_zbud(void)
 {
+#ifdef CONFIG_ZPOOL
+       zpool_unregister_driver(&zbud_zpool_driver);
+#endif
+
        pr_info("unloaded\n");
 }
 
diff --git a/mm/zpool.c b/mm/zpool.c
new file mode 100644 (file)
index 0000000..e40612a
--- /dev/null
@@ -0,0 +1,364 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for memory storage pool implementations.
+ * Typically, this is used to store compressed memory.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/zpool.h>
+
+struct zpool {
+       char *type;
+
+       struct zpool_driver *driver;
+       void *pool;
+       struct zpool_ops *ops;
+
+       struct list_head list;
+};
+
+static LIST_HEAD(drivers_head);
+static DEFINE_SPINLOCK(drivers_lock);
+
+static LIST_HEAD(pools_head);
+static DEFINE_SPINLOCK(pools_lock);
+
+/**
+ * zpool_register_driver() - register a zpool implementation.
+ * @driver:    driver to register
+ */
+void zpool_register_driver(struct zpool_driver *driver)
+{
+       spin_lock(&drivers_lock);
+       atomic_set(&driver->refcount, 0);
+       list_add(&driver->list, &drivers_head);
+       spin_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(zpool_register_driver);
+
+/**
+ * zpool_unregister_driver() - unregister a zpool implementation.
+ * @driver:    driver to unregister.
+ *
+ * Module usage counting is used to prevent using a driver
+ * while/after unloading, so if this is called from module
+ * exit function, this should never fail; if called from
+ * other than the module exit function, and this returns
+ * failure, the driver is in use and must remain available.
+ */
+int zpool_unregister_driver(struct zpool_driver *driver)
+{
+       int ret = 0, refcount;
+
+       spin_lock(&drivers_lock);
+       refcount = atomic_read(&driver->refcount);
+       WARN_ON(refcount < 0);
+       if (refcount > 0)
+               ret = -EBUSY;
+       else
+               list_del(&driver->list);
+       spin_unlock(&drivers_lock);
+
+       return ret;
+}
+EXPORT_SYMBOL(zpool_unregister_driver);
+
+/**
+ * zpool_evict() - evict callback from a zpool implementation.
+ * @pool:      pool to evict from.
+ * @handle:    handle to evict.
+ *
+ * This can be used by zpool implementations to call the
+ * user's evict zpool_ops struct evict callback.
+ */
+int zpool_evict(void *pool, unsigned long handle)
+{
+       struct zpool *zpool;
+
+       spin_lock(&pools_lock);
+       list_for_each_entry(zpool, &pools_head, list) {
+               if (zpool->pool == pool) {
+                       spin_unlock(&pools_lock);
+                       if (!zpool->ops || !zpool->ops->evict)
+                               return -EINVAL;
+                       return zpool->ops->evict(zpool, handle);
+               }
+       }
+       spin_unlock(&pools_lock);
+
+       return -ENOENT;
+}
+EXPORT_SYMBOL(zpool_evict);
+
+static struct zpool_driver *zpool_get_driver(char *type)
+{
+       struct zpool_driver *driver;
+
+       spin_lock(&drivers_lock);
+       list_for_each_entry(driver, &drivers_head, list) {
+               if (!strcmp(driver->type, type)) {
+                       bool got = try_module_get(driver->owner);
+
+                       if (got)
+                               atomic_inc(&driver->refcount);
+                       spin_unlock(&drivers_lock);
+                       return got ? driver : NULL;
+               }
+       }
+
+       spin_unlock(&drivers_lock);
+       return NULL;
+}
+
+static void zpool_put_driver(struct zpool_driver *driver)
+{
+       atomic_dec(&driver->refcount);
+       module_put(driver->owner);
+}
+
+/**
+ * zpool_create_pool() - Create a new zpool
+ * @type       The type of the zpool to create (e.g. zbud, zsmalloc)
+ * @gfp                The GFP flags to use when allocating the pool.
+ * @ops                The optional ops callback.
+ *
+ * This creates a new zpool of the specified type.  The gfp flags will be
+ * used when allocating memory, if the implementation supports it.  If the
+ * ops param is NULL, then the created zpool will not be shrinkable.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: New zpool on success, NULL on failure.
+ */
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
+{
+       struct zpool_driver *driver;
+       struct zpool *zpool;
+
+       pr_info("creating pool type %s\n", type);
+
+       driver = zpool_get_driver(type);
+
+       if (!driver) {
+               request_module(type);
+               driver = zpool_get_driver(type);
+       }
+
+       if (!driver) {
+               pr_err("no driver for type %s\n", type);
+               return NULL;
+       }
+
+       zpool = kmalloc(sizeof(*zpool), gfp);
+       if (!zpool) {
+               pr_err("couldn't create zpool - out of memory\n");
+               zpool_put_driver(driver);
+               return NULL;
+       }
+
+       zpool->type = driver->type;
+       zpool->driver = driver;
+       zpool->pool = driver->create(gfp, ops);
+       zpool->ops = ops;
+
+       if (!zpool->pool) {
+               pr_err("couldn't create %s pool\n", type);
+               zpool_put_driver(driver);
+               kfree(zpool);
+               return NULL;
+       }
+
+       pr_info("created %s pool\n", type);
+
+       spin_lock(&pools_lock);
+       list_add(&zpool->list, &pools_head);
+       spin_unlock(&pools_lock);
+
+       return zpool;
+}
+
+/**
+ * zpool_destroy_pool() - Destroy a zpool
+ * @pool       The zpool to destroy.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when destroying different pools.  The same
+ * pool should only be destroyed once, and should not be used
+ * after it is destroyed.
+ *
+ * This destroys an existing zpool.  The zpool should not be in use.
+ */
+void zpool_destroy_pool(struct zpool *zpool)
+{
+       pr_info("destroying pool type %s\n", zpool->type);
+
+       spin_lock(&pools_lock);
+       list_del(&zpool->list);
+       spin_unlock(&pools_lock);
+       zpool->driver->destroy(zpool->pool);
+       zpool_put_driver(zpool->driver);
+       kfree(zpool);
+}
+
+/**
+ * zpool_get_type() - Get the type of the zpool
+ * @pool       The zpool to check
+ *
+ * This returns the type of the pool.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: The type of zpool.
+ */
+char *zpool_get_type(struct zpool *zpool)
+{
+       return zpool->type;
+}
+
+/**
+ * zpool_malloc() - Allocate memory
+ * @pool       The zpool to allocate from.
+ * @size       The amount of memory to allocate.
+ * @gfp                The GFP flags to use when allocating memory.
+ * @handle     Pointer to the handle to set
+ *
+ * This allocates the requested amount of memory from the pool.
+ * The gfp flags will be used when allocating memory, if the
+ * implementation supports it.  The provided @handle will be
+ * set to the allocated object handle.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error.
+ */
+int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp,
+                       unsigned long *handle)
+{
+       return zpool->driver->malloc(zpool->pool, size, gfp, handle);
+}
+
+/**
+ * zpool_free() - Free previously allocated memory
+ * @pool       The zpool that allocated the memory.
+ * @handle     The handle to the memory to free.
+ *
+ * This frees previously allocated memory.  This does not guarantee
+ * that the pool will actually free memory, only that the memory
+ * in the pool will become available for use by the pool.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when freeing different handles.  The same
+ * handle should only be freed once, and should not be used
+ * after freeing.
+ */
+void zpool_free(struct zpool *zpool, unsigned long handle)
+{
+       zpool->driver->free(zpool->pool, handle);
+}
+
+/**
+ * zpool_shrink() - Shrink the pool size
+ * @pool       The zpool to shrink.
+ * @pages      The number of pages to shrink the pool.
+ * @reclaimed  The number of pages successfully evicted.
+ *
+ * This attempts to shrink the actual memory size of the pool
+ * by evicting currently used handle(s).  If the pool was
+ * created with no zpool_ops, or the evict call fails for any
+ * of the handles, this will fail.  If non-NULL, the @reclaimed
+ * parameter will be set to the number of pages reclaimed,
+ * which may be more than the number of pages requested.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error/failure.
+ */
+int zpool_shrink(struct zpool *zpool, unsigned int pages,
+                       unsigned int *reclaimed)
+{
+       return zpool->driver->shrink(zpool->pool, pages, reclaimed);
+}
+
+/**
+ * zpool_map_handle() - Map a previously allocated handle into memory
+ * @pool       The zpool that the handle was allocated from
+ * @handle     The handle to map
+ * @mm         How the memory should be mapped
+ *
+ * This maps a previously allocated handle into memory.  The @mm
+ * param indicates to the implementation how the memory will be
+ * used, i.e. read-only, write-only, read-write.  If the
+ * implementation does not support it, the memory will be treated
+ * as read-write.
+ *
+ * This may hold locks, disable interrupts, and/or preemption,
+ * and the zpool_unmap_handle() must be called to undo those
+ * actions.  The code that uses the mapped handle should complete
+ * its operatons on the mapped handle memory quickly and unmap
+ * as soon as possible.  As the implementation may use per-cpu
+ * data, multiple handles should not be mapped concurrently on
+ * any cpu.
+ *
+ * Returns: A pointer to the handle's mapped memory area.
+ */
+void *zpool_map_handle(struct zpool *zpool, unsigned long handle,
+                       enum zpool_mapmode mapmode)
+{
+       return zpool->driver->map(zpool->pool, handle, mapmode);
+}
+
+/**
+ * zpool_unmap_handle() - Unmap a previously mapped handle
+ * @pool       The zpool that the handle was allocated from
+ * @handle     The handle to unmap
+ *
+ * This unmaps a previously mapped handle.  Any locks or other
+ * actions that the implementation took in zpool_map_handle()
+ * will be undone here.  The memory area returned from
+ * zpool_map_handle() should no longer be used after this.
+ */
+void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
+{
+       zpool->driver->unmap(zpool->pool, handle);
+}
+
+/**
+ * zpool_get_total_size() - The total size of the pool
+ * @pool       The zpool to check
+ *
+ * This returns the total size in bytes of the pool.
+ *
+ * Returns: Total size of the zpool in bytes.
+ */
+u64 zpool_get_total_size(struct zpool *zpool)
+{
+       return zpool->driver->total_size(zpool->pool);
+}
+
+static int __init init_zpool(void)
+{
+       pr_info("loaded\n");
+       return 0;
+}
+
+static void __exit exit_zpool(void)
+{
+       pr_info("unloaded\n");
+}
+
+module_init(init_zpool);
+module_exit(exit_zpool);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("Common API for compressed memory storage");
index fe78189624cfce58d29a567b6c379b80367775de..4e2fc83cb394b9b53384fdc82288e7b6ab793b3a 100644 (file)
@@ -92,6 +92,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/zsmalloc.h>
+#include <linux/zpool.h>
 
 /*
  * This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -240,6 +241,81 @@ struct mapping_area {
        enum zs_mapmode vm_mm; /* mapping mode */
 };
 
+/* zpool driver */
+
+#ifdef CONFIG_ZPOOL
+
+static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+       return zs_create_pool(gfp);
+}
+
+static void zs_zpool_destroy(void *pool)
+{
+       zs_destroy_pool(pool);
+}
+
+static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+                       unsigned long *handle)
+{
+       *handle = zs_malloc(pool, size);
+       return *handle ? 0 : -1;
+}
+static void zs_zpool_free(void *pool, unsigned long handle)
+{
+       zs_free(pool, handle);
+}
+
+static int zs_zpool_shrink(void *pool, unsigned int pages,
+                       unsigned int *reclaimed)
+{
+       return -EINVAL;
+}
+
+static void *zs_zpool_map(void *pool, unsigned long handle,
+                       enum zpool_mapmode mm)
+{
+       enum zs_mapmode zs_mm;
+
+       switch (mm) {
+       case ZPOOL_MM_RO:
+               zs_mm = ZS_MM_RO;
+               break;
+       case ZPOOL_MM_WO:
+               zs_mm = ZS_MM_WO;
+               break;
+       case ZPOOL_MM_RW: /* fallthru */
+       default:
+               zs_mm = ZS_MM_RW;
+               break;
+       }
+
+       return zs_map_object(pool, handle, zs_mm);
+}
+static void zs_zpool_unmap(void *pool, unsigned long handle)
+{
+       zs_unmap_object(pool, handle);
+}
+
+static u64 zs_zpool_total_size(void *pool)
+{
+       return zs_get_total_size_bytes(pool);
+}
+
+static struct zpool_driver zs_zpool_driver = {
+       .type =         "zsmalloc",
+       .owner =        THIS_MODULE,
+       .create =       zs_zpool_create,
+       .destroy =      zs_zpool_destroy,
+       .malloc =       zs_zpool_malloc,
+       .free =         zs_zpool_free,
+       .shrink =       zs_zpool_shrink,
+       .map =          zs_zpool_map,
+       .unmap =        zs_zpool_unmap,
+       .total_size =   zs_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
 
 /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
 static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
@@ -690,7 +766,7 @@ static inline void __zs_cpu_down(struct mapping_area *area)
 static inline void *__zs_map_object(struct mapping_area *area,
                                struct page *pages[2], int off, int size)
 {
-       BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
+       BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
        area->vm_addr = area->vm->addr;
        return area->vm_addr + off;
 }
@@ -814,6 +890,10 @@ static void zs_exit(void)
 {
        int cpu;
 
+#ifdef CONFIG_ZPOOL
+       zpool_unregister_driver(&zs_zpool_driver);
+#endif
+
        cpu_notifier_register_begin();
 
        for_each_online_cpu(cpu)
@@ -840,6 +920,10 @@ static int zs_init(void)
 
        cpu_notifier_register_done();
 
+#ifdef CONFIG_ZPOOL
+       zpool_register_driver(&zs_zpool_driver);
+#endif
+
        return 0;
 fail:
        zs_exit();
index 008388fe7b0ff923b2300504b3a7717869fe12f8..032c21eeab2b0f05ebe73a9a7d1fd820651f3cef 100644 (file)
@@ -34,7 +34,7 @@
 #include <linux/swap.h>
 #include <linux/crypto.h>
 #include <linux/mempool.h>
-#include <linux/zbud.h>
+#include <linux/zpool.h>
 
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
@@ -45,8 +45,8 @@
 /*********************************
 * statistics
 **********************************/
-/* Number of memory pages used by the compressed pool */
-static u64 zswap_pool_pages;
+/* Total bytes used by the compressed storage */
+static u64 zswap_pool_total_size;
 /* The number of compressed pages currently stored in zswap */
 static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
 
@@ -89,8 +89,13 @@ static unsigned int zswap_max_pool_percent = 20;
 module_param_named(max_pool_percent,
                        zswap_max_pool_percent, uint, 0644);
 
-/* zbud_pool is shared by all of zswap backend  */
-static struct zbud_pool *zswap_pool;
+/* Compressed storage to use */
+#define ZSWAP_ZPOOL_DEFAULT "zbud"
+static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+module_param_named(zpool, zswap_zpool_type, charp, 0444);
+
+/* zpool is shared by all of zswap backend  */
+static struct zpool *zswap_pool;
 
 /*********************************
 * compression functions
@@ -168,7 +173,7 @@ static void zswap_comp_exit(void)
  *            be held while changing the refcount.  Since the lock must
  *            be held, there is no reason to also make refcount atomic.
  * offset - the swap offset for the entry.  Index into the red-black tree.
- * handle - zbud allocation handle that stores the compressed page data
+ * handle - zpool allocation handle that stores the compressed page data
  * length - the length in bytes of the compressed page data.  Needed during
  *          decompression
  */
@@ -284,15 +289,15 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
 }
 
 /*
- * Carries out the common pattern of freeing and entry's zbud allocation,
+ * Carries out the common pattern of freeing and entry's zpool allocation,
  * freeing the entry itself, and decrementing the number of stored pages.
  */
 static void zswap_free_entry(struct zswap_entry *entry)
 {
-       zbud_free(zswap_pool, entry->handle);
+       zpool_free(zswap_pool, entry->handle);
        zswap_entry_cache_free(entry);
        atomic_dec(&zswap_stored_pages);
-       zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
 }
 
 /* caller must hold the tree lock */
@@ -409,7 +414,7 @@ cleanup:
 static bool zswap_is_full(void)
 {
        return totalram_pages * zswap_max_pool_percent / 100 <
-               zswap_pool_pages;
+               DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
 }
 
 /*********************************
@@ -525,7 +530,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
  * the swap cache, the compressed version stored by zswap can be
  * freed.
  */
-static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
+static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 {
        struct zswap_header *zhdr;
        swp_entry_t swpentry;
@@ -541,9 +546,9 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
        };
 
        /* extract swpentry from data */
-       zhdr = zbud_map(pool, handle);
+       zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
        swpentry = zhdr->swpentry; /* here */
-       zbud_unmap(pool, handle);
+       zpool_unmap_handle(pool, handle);
        tree = zswap_trees[swp_type(swpentry)];
        offset = swp_offset(swpentry);
 
@@ -573,13 +578,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
        case ZSWAP_SWAPCACHE_NEW: /* page is locked */
                /* decompress */
                dlen = PAGE_SIZE;
-               src = (u8 *)zbud_map(zswap_pool, entry->handle) +
-                       sizeof(struct zswap_header);
+               src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+                               ZPOOL_MM_RO) + sizeof(struct zswap_header);
                dst = kmap_atomic(page);
                ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
                                entry->length, dst, &dlen);
                kunmap_atomic(dst);
-               zbud_unmap(zswap_pool, entry->handle);
+               zpool_unmap_handle(zswap_pool, entry->handle);
                BUG_ON(ret);
                BUG_ON(dlen != PAGE_SIZE);
 
@@ -652,7 +657,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        /* reclaim space if needed */
        if (zswap_is_full()) {
                zswap_pool_limit_hit++;
-               if (zbud_reclaim_page(zswap_pool, 8)) {
+               if (zpool_shrink(zswap_pool, 1, NULL)) {
                        zswap_reject_reclaim_fail++;
                        ret = -ENOMEM;
                        goto reject;
@@ -679,7 +684,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 
        /* store */
        len = dlen + sizeof(struct zswap_header);
-       ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
+       ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
                &handle);
        if (ret == -ENOSPC) {
                zswap_reject_compress_poor++;
@@ -689,11 +694,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
                zswap_reject_alloc_fail++;
                goto freepage;
        }
-       zhdr = zbud_map(zswap_pool, handle);
+       zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
        zhdr->swpentry = swp_entry(type, offset);
        buf = (u8 *)(zhdr + 1);
        memcpy(buf, dst, dlen);
-       zbud_unmap(zswap_pool, handle);
+       zpool_unmap_handle(zswap_pool, handle);
        put_cpu_var(zswap_dstmem);
 
        /* populate entry */
@@ -716,7 +721,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 
        /* update stats */
        atomic_inc(&zswap_stored_pages);
-       zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+       zswap_pool_total_size = zpool_get_total_size(zswap_pool);
 
        return 0;
 
@@ -752,13 +757,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
 
        /* decompress */
        dlen = PAGE_SIZE;
-       src = (u8 *)zbud_map(zswap_pool, entry->handle) +
-                       sizeof(struct zswap_header);
+       src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+                       ZPOOL_MM_RO) + sizeof(struct zswap_header);
        dst = kmap_atomic(page);
        ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
                dst, &dlen);
        kunmap_atomic(dst);
-       zbud_unmap(zswap_pool, entry->handle);
+       zpool_unmap_handle(zswap_pool, entry->handle);
        BUG_ON(ret);
 
        spin_lock(&tree->lock);
@@ -811,7 +816,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
        zswap_trees[type] = NULL;
 }
 
-static struct zbud_ops zswap_zbud_ops = {
+static struct zpool_ops zswap_zpool_ops = {
        .evict = zswap_writeback_entry
 };
 
@@ -869,8 +874,8 @@ static int __init zswap_debugfs_init(void)
                        zswap_debugfs_root, &zswap_written_back_pages);
        debugfs_create_u64("duplicate_entry", S_IRUGO,
                        zswap_debugfs_root, &zswap_duplicate_entry);
-       debugfs_create_u64("pool_pages", S_IRUGO,
-                       zswap_debugfs_root, &zswap_pool_pages);
+       debugfs_create_u64("pool_total_size", S_IRUGO,
+                       zswap_debugfs_root, &zswap_pool_total_size);
        debugfs_create_atomic_t("stored_pages", S_IRUGO,
                        zswap_debugfs_root, &zswap_stored_pages);
 
@@ -895,16 +900,26 @@ static void __exit zswap_debugfs_exit(void) { }
 **********************************/
 static int __init init_zswap(void)
 {
+       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+
        if (!zswap_enabled)
                return 0;
 
        pr_info("loading zswap\n");
 
-       zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+       zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops);
+       if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
+               pr_info("%s zpool not available\n", zswap_zpool_type);
+               zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+               zswap_pool = zpool_create_pool(zswap_zpool_type, gfp,
+                                       &zswap_zpool_ops);
+       }
        if (!zswap_pool) {
-               pr_err("zbud pool creation failed\n");
+               pr_err("%s zpool not available\n", zswap_zpool_type);
+               pr_err("zpool creation failed\n");
                goto error;
        }
+       pr_info("using %s pool\n", zswap_zpool_type);
 
        if (zswap_entry_cache_create()) {
                pr_err("entry cache creation failed\n");
@@ -928,7 +943,7 @@ pcpufail:
 compfail:
        zswap_entry_cache_destory();
 cachefail:
-       zbud_destroy_pool(zswap_pool);
+       zpool_destroy_pool(zswap_pool);
 error:
        return -ENOMEM;
 }
index 022d18ab27a64dabc04ba8e71e1455c10904ae8a..52c43f9042209deaba0be22b549724ad28de1a77 100644 (file)
@@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
 
        /* Reached the end of the list, so insert after 'frag_entry_last'. */
        if (likely(frag_entry_last)) {
-               hlist_add_after(&frag_entry_last->list, &frag_entry_new->list);
+               hlist_add_behind(&frag_entry_last->list, &frag_entry_new->list);
                chain->size += skb->len - hdr_size;
                chain->timestamp = jiffies;
                ret = true;
index b4845f4b2bb414c7403e225528add6840eb26f74..7751c92c8c57fc24b0c18e4d20a095bfa02e9ff0 100644 (file)
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
        }
 
        if (slot)
-               hlist_add_after_rcu(slot, &port->rlist);
+               hlist_add_behind_rcu(&port->rlist, slot);
        else
                hlist_add_head_rcu(&port->rlist, &br->router_list);
 }
index 5afeb5aa4c7cfd9b0f794a45840f6fbd79b90315..e9cb2588e4161c41613c0e50a17ea91b7416a514 100644 (file)
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
                        last = li;
                }
                if (last)
-                       hlist_add_after_rcu(&last->hlist, &new->hlist);
+                       hlist_add_behind_rcu(&new->hlist, &last->hlist);
                else
                        hlist_add_before_rcu(&new->hlist, &li->hlist);
        }
index 731e1e1722d9b4322f3907e57e3a1fd1994230b5..fd0dc47f471dad23566248fecadd571a1a4c00d9 100644 (file)
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
                last = p;
        }
        if (last)
-               hlist_add_after_rcu(&last->list, &newp->list);
+               hlist_add_behind_rcu(&newp->list, &last->list);
        else
                hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
 out:
index 0525d78ba32866c64c0b31bcf5b9d147855c308a..beeed602aeb379f2ddfbd74a61c798cd371636ba 100644 (file)
@@ -389,7 +389,7 @@ redo:
                        if (h != h0)
                                continue;
                        hlist_del(&pol->bydst);
-                       hlist_add_after(entry0, &pol->bydst);
+                       hlist_add_behind(&pol->bydst, entry0);
                }
                entry0 = &pol->bydst;
        }
@@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
                        break;
        }
        if (newpos)
-               hlist_add_after(newpos, &policy->bydst);
+               hlist_add_behind(&policy->bydst, newpos);
        else
                hlist_add_head(&policy->bydst, chain);
        xfrm_pol_hold(policy);
index 182be0f124074c6d508d8a97e498ea7925665a1f..31a731e06f5022afd7c040d5688840491b228e9d 100755 (executable)
@@ -309,9 +309,12 @@ our $Operators     = qr{
 our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x;
 
 our $NonptrType;
+our $NonptrTypeMisordered;
 our $NonptrTypeWithAttr;
 our $Type;
+our $TypeMisordered;
 our $Declare;
+our $DeclareMisordered;
 
 our $NON_ASCII_UTF8    = qr{
        [\xC2-\xDF][\x80-\xBF]               # non-overlong 2-byte
@@ -353,16 +356,36 @@ our $signature_tags = qr{(?xi:
        Cc:
 )};
 
+our @typeListMisordered = (
+       qr{char\s+(?:un)?signed},
+       qr{int\s+(?:(?:un)?signed\s+)?short\s},
+       qr{int\s+short(?:\s+(?:un)?signed)},
+       qr{short\s+int(?:\s+(?:un)?signed)},
+       qr{(?:un)?signed\s+int\s+short},
+       qr{short\s+(?:un)?signed},
+       qr{long\s+int\s+(?:un)?signed},
+       qr{int\s+long\s+(?:un)?signed},
+       qr{long\s+(?:un)?signed\s+int},
+       qr{int\s+(?:un)?signed\s+long},
+       qr{int\s+(?:un)?signed},
+       qr{int\s+long\s+long\s+(?:un)?signed},
+       qr{long\s+long\s+int\s+(?:un)?signed},
+       qr{long\s+long\s+(?:un)?signed\s+int},
+       qr{long\s+long\s+(?:un)?signed},
+       qr{long\s+(?:un)?signed},
+);
+
 our @typeList = (
        qr{void},
-       qr{(?:unsigned\s+)?char},
-       qr{(?:unsigned\s+)?short},
-       qr{(?:unsigned\s+)?int},
-       qr{(?:unsigned\s+)?long},
-       qr{(?:unsigned\s+)?long\s+int},
-       qr{(?:unsigned\s+)?long\s+long},
-       qr{(?:unsigned\s+)?long\s+long\s+int},
-       qr{unsigned},
+       qr{(?:(?:un)?signed\s+)?char},
+       qr{(?:(?:un)?signed\s+)?short\s+int},
+       qr{(?:(?:un)?signed\s+)?short},
+       qr{(?:(?:un)?signed\s+)?int},
+       qr{(?:(?:un)?signed\s+)?long\s+int},
+       qr{(?:(?:un)?signed\s+)?long\s+long\s+int},
+       qr{(?:(?:un)?signed\s+)?long\s+long},
+       qr{(?:(?:un)?signed\s+)?long},
+       qr{(?:un)?signed},
        qr{float},
        qr{double},
        qr{bool},
@@ -372,6 +395,7 @@ our @typeList = (
        qr{${Ident}_t},
        qr{${Ident}_handler},
        qr{${Ident}_handler_fn},
+       @typeListMisordered,
 );
 our @typeListWithAttr = (
        @typeList,
@@ -399,11 +423,6 @@ foreach my $entry (@mode_permission_funcs) {
        $mode_perms_search .= $entry->[0];
 }
 
-our $declaration_macros = qr{(?x:
-       (?:$Storage\s+)?(?:DECLARE|DEFINE)_[A-Z]+\s*\(|
-       (?:$Storage\s+)?LIST_HEAD\s*\(
-)};
-
 our $allowed_asm_includes = qr{(?x:
        irq|
        memory
@@ -413,6 +432,7 @@ our $allowed_asm_includes = qr{(?x:
 sub build_types {
        my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
        my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)";
+       my $Misordered = "(?x:  \n" . join("|\n  ", @typeListMisordered) . "\n)";
        my $allWithAttr = "(?x:  \n" . join("|\n  ", @typeListWithAttr) . "\n)";
        $Modifier       = qr{(?:$Attribute|$Sparse|$mods)};
        $NonptrType     = qr{
@@ -424,6 +444,13 @@ sub build_types {
                        )
                        (?:\s+$Modifier|\s+const)*
                  }x;
+       $NonptrTypeMisordered   = qr{
+                       (?:$Modifier\s+|const\s+)*
+                       (?:
+                               (?:${Misordered}\b)
+                       )
+                       (?:\s+$Modifier|\s+const)*
+                 }x;
        $NonptrTypeWithAttr     = qr{
                        (?:$Modifier\s+|const\s+)*
                        (?:
@@ -435,10 +462,16 @@ sub build_types {
                  }x;
        $Type   = qr{
                        $NonptrType
-                       (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)?
+                       (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
+                       (?:\s+$Inline|\s+$Modifier)*
+                 }x;
+       $TypeMisordered = qr{
+                       $NonptrTypeMisordered
+                       (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
                        (?:\s+$Inline|\s+$Modifier)*
                  }x;
        $Declare        = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type};
+       $DeclareMisordered      = qr{(?:$Storage\s+(?:$Inline\s+)?)?$TypeMisordered};
 }
 build_types();
 
@@ -452,6 +485,12 @@ our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/;
 our $LvalOrFunc        = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*};
 our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)};
 
+our $declaration_macros = qr{(?x:
+       (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,2}\s*\(|
+       (?:$Storage\s+)?LIST_HEAD\s*\(|
+       (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
+)};
+
 sub deparenthesize {
        my ($string) = @_;
        return "" if (!defined($string));
@@ -550,11 +589,43 @@ sub seed_camelcase_includes {
        }
 }
 
+sub git_commit_info {
+       my ($commit, $id, $desc) = @_;
+
+       return ($id, $desc) if ((which("git") eq "") || !(-e ".git"));
+
+       my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`;
+       $output =~ s/^\s*//gm;
+       my @lines = split("\n", $output);
+
+       if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) {
+# Maybe one day convert this block of bash into something that returns
+# all matching commit ids, but it's very slow...
+#
+#              echo "checking commits $1..."
+#              git rev-list --remotes | grep -i "^$1" |
+#              while read line ; do
+#                  git log --format='%H %s' -1 $line |
+#                  echo "commit $(cut -c 1-12,41-)"
+#              done
+       } elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) {
+       } else {
+               $id = substr($lines[0], 0, 12);
+               $desc = substr($lines[0], 41);
+       }
+
+       return ($id, $desc);
+}
+
 $chk_signoff = 0 if ($file);
 
 my @rawlines = ();
 my @lines = ();
 my @fixed = ();
+my @fixed_inserted = ();
+my @fixed_deleted = ();
+my $fixlinenr = -1;
+
 my $vname;
 for my $filename (@ARGV) {
        my $FILE;
@@ -583,6 +654,9 @@ for my $filename (@ARGV) {
        @rawlines = ();
        @lines = ();
        @fixed = ();
+       @fixed_inserted = ();
+       @fixed_deleted = ();
+       $fixlinenr = -1;
 }
 
 exit($exit);
@@ -674,6 +748,18 @@ sub format_email {
        return $formatted_email;
 }
 
+sub which {
+       my ($bin) = @_;
+
+       foreach my $path (split(/:/, $ENV{PATH})) {
+               if (-e "$path/$bin") {
+                       return "$path/$bin";
+               }
+       }
+
+       return "";
+}
+
 sub which_conf {
        my ($conf) = @_;
 
@@ -1483,6 +1569,90 @@ sub report_dump {
        our @report;
 }
 
+sub fixup_current_range {
+       my ($lineRef, $offset, $length) = @_;
+
+       if ($$lineRef =~ /^\@\@ -\d+,\d+ \+(\d+),(\d+) \@\@/) {
+               my $o = $1;
+               my $l = $2;
+               my $no = $o + $offset;
+               my $nl = $l + $length;
+               $$lineRef =~ s/\+$o,$l \@\@/\+$no,$nl \@\@/;
+       }
+}
+
+sub fix_inserted_deleted_lines {
+       my ($linesRef, $insertedRef, $deletedRef) = @_;
+
+       my $range_last_linenr = 0;
+       my $delta_offset = 0;
+
+       my $old_linenr = 0;
+       my $new_linenr = 0;
+
+       my $next_insert = 0;
+       my $next_delete = 0;
+
+       my @lines = ();
+
+       my $inserted = @{$insertedRef}[$next_insert++];
+       my $deleted = @{$deletedRef}[$next_delete++];
+
+       foreach my $old_line (@{$linesRef}) {
+               my $save_line = 1;
+               my $line = $old_line;   #don't modify the array
+               if ($line =~ /^(?:\+\+\+\|\-\-\-)\s+\S+/) {     #new filename
+                       $delta_offset = 0;
+               } elsif ($line =~ /^\@\@ -\d+,\d+ \+\d+,\d+ \@\@/) {    #new hunk
+                       $range_last_linenr = $new_linenr;
+                       fixup_current_range(\$line, $delta_offset, 0);
+               }
+
+               while (defined($deleted) && ${$deleted}{'LINENR'} == $old_linenr) {
+                       $deleted = @{$deletedRef}[$next_delete++];
+                       $save_line = 0;
+                       fixup_current_range(\$lines[$range_last_linenr], $delta_offset--, -1);
+               }
+
+               while (defined($inserted) && ${$inserted}{'LINENR'} == $old_linenr) {
+                       push(@lines, ${$inserted}{'LINE'});
+                       $inserted = @{$insertedRef}[$next_insert++];
+                       $new_linenr++;
+                       fixup_current_range(\$lines[$range_last_linenr], $delta_offset++, 1);
+               }
+
+               if ($save_line) {
+                       push(@lines, $line);
+                       $new_linenr++;
+               }
+
+               $old_linenr++;
+       }
+
+       return @lines;
+}
+
+sub fix_insert_line {
+       my ($linenr, $line) = @_;
+
+       my $inserted = {
+               LINENR => $linenr,
+               LINE => $line,
+       };
+       push(@fixed_inserted, $inserted);
+}
+
+sub fix_delete_line {
+       my ($linenr, $line) = @_;
+
+       my $deleted = {
+               LINENR => $linenr,
+               LINE => $line,
+       };
+
+       push(@fixed_deleted, $deleted);
+}
+
 sub ERROR {
        my ($type, $msg) = @_;
 
@@ -1637,11 +1807,13 @@ sub process {
        my $signoff = 0;
        my $is_patch = 0;
 
-       my $in_header_lines = 1;
+       my $in_header_lines = $file ? 0 : 1;
        my $in_commit_log = 0;          #Scanning lines before patch
-
+       my $reported_maintainer_file = 0;
        my $non_utf8_charset = 0;
 
+       my $last_blank_line = 0;
+
        our @report = ();
        our $cnt_lines = 0;
        our $cnt_error = 0;
@@ -1759,8 +1931,10 @@ sub process {
 
        $realcnt = 0;
        $linenr = 0;
+       $fixlinenr = -1;
        foreach my $line (@lines) {
                $linenr++;
+               $fixlinenr++;
                my $sline = $line;      #copy of $line
                $sline =~ s/$;/ /g;     #with comments as spaces
 
@@ -1891,7 +2065,7 @@ sub process {
                                if (WARN("BAD_SIGN_OFF",
                                         "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) &&
                                    $fix) {
-                                       $fixed[$linenr - 1] =
+                                       $fixed[$fixlinenr] =
                                            "$ucfirst_sign_off $email";
                                }
                        }
@@ -1899,7 +2073,7 @@ sub process {
                                if (WARN("BAD_SIGN_OFF",
                                         "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) &&
                                    $fix) {
-                                       $fixed[$linenr - 1] =
+                                       $fixed[$fixlinenr] =
                                            "$ucfirst_sign_off $email";
                                }
 
@@ -1908,7 +2082,7 @@ sub process {
                                if (WARN("BAD_SIGN_OFF",
                                         "Use a single space after $ucfirst_sign_off\n" . $herecurr) &&
                                    $fix) {
-                                       $fixed[$linenr - 1] =
+                                       $fixed[$fixlinenr] =
                                            "$ucfirst_sign_off $email";
                                }
                        }
@@ -1956,6 +2130,31 @@ sub process {
                              "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr);
                }
 
+# Check for improperly formed commit descriptions
+               if ($in_commit_log &&
+                   $line =~ /\bcommit\s+[0-9a-f]{5,}/i &&
+                   $line !~ /\b[Cc]ommit [0-9a-f]{12,16} \("/) {
+                       $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i;
+                       my $init_char = $1;
+                       my $orig_commit = lc($2);
+                       my $id = '01234567890ab';
+                       my $desc = 'commit description';
+                       ($id, $desc) = git_commit_info($orig_commit, $id, $desc);
+                       ERROR("GIT_COMMIT_ID",
+                             "Please use 12 to 16 chars for the git commit ID like: '${init_char}ommit $id (\"$desc\")'\n" . $herecurr);
+               }
+
+# Check for added, moved or deleted files
+               if (!$reported_maintainer_file && !$in_commit_log &&
+                   ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ ||
+                    $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ ||
+                    ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ &&
+                     (defined($1) || defined($2))))) {
+                       $reported_maintainer_file = 1;
+                       WARN("FILE_PATH_CHANGES",
+                            "added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
+               }
+
 # Check for wrappage within a valid hunk of the file
                if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
                        ERROR("CORRUPTED_PATCH",
@@ -1993,7 +2192,8 @@ sub process {
 # Check if it's the start of a commit log
 # (not a header line and we haven't seen the patch filename)
                if ($in_header_lines && $realfile =~ /^$/ &&
-                   $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) {
+                   !($rawline =~ /^\s+\S/ ||
+                     $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) {
                        $in_header_lines = 0;
                        $in_commit_log = 1;
                }
@@ -2021,14 +2221,14 @@ sub process {
                        if (ERROR("DOS_LINE_ENDINGS",
                                  "DOS line endings\n" . $herevet) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/[\s\015]+$//;
+                               $fixed[$fixlinenr] =~ s/[\s\015]+$//;
                        }
                } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
                        my $herevet = "$here\n" . cat_vet($rawline) . "\n";
                        if (ERROR("TRAILING_WHITESPACE",
                                  "trailing whitespace\n" . $herevet) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\s+$//;
+                               $fixed[$fixlinenr] =~ s/\s+$//;
                        }
 
                        $rpt_cleaners = 1;
@@ -2049,7 +2249,7 @@ sub process {
 # Only applies when adding the entry originally, after that we do not have
 # sufficient context to determine whether it is indeed long enough.
                if ($realfile =~ /Kconfig/ &&
-                   $line =~ /.\s*config\s+/) {
+                   $line =~ /^\+\s*config\s+/) {
                        my $length = 0;
                        my $cnt = $realcnt;
                        my $ln = $linenr + 1;
@@ -2062,10 +2262,11 @@ sub process {
                                $is_end = $lines[$ln - 1] =~ /^\+/;
 
                                next if ($f =~ /^-/);
+                               last if (!$file && $f =~ /^\@\@/);
 
-                               if ($lines[$ln - 1] =~ /.\s*(?:bool|tristate)\s*\"/) {
+                               if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate)\s*\"/) {
                                        $is_start = 1;
-                               } elsif ($lines[$ln - 1] =~ /.\s*(?:---)?help(?:---)?$/) {
+                               } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
                                        $length = -1;
                                }
 
@@ -2161,12 +2362,18 @@ sub process {
                             "quoted string split across lines\n" . $hereprev);
                }
 
+# check for missing a space in a string concatination
+               if ($prevrawline =~ /[^\\]\w"$/ && $rawline =~ /^\+[\t ]+"\w/) {
+                       WARN('MISSING_SPACE',
+                            "break quoted strings at a space character\n" . $hereprev);
+               }
+
 # check for spaces before a quoted newline
                if ($rawline =~ /^.*\".*\s\\n/) {
                        if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
                                 "unnecessary whitespace before a quoted newline\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
+                               $fixed[$fixlinenr] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
                        }
 
                }
@@ -2203,7 +2410,7 @@ sub process {
                        if (ERROR("CODE_INDENT",
                                  "code indent should use tabs where possible\n" . $herevet) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+                               $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
                        }
                }
 
@@ -2213,9 +2420,9 @@ sub process {
                        if (WARN("SPACE_BEFORE_TAB",
                                "please, no space before tabs\n" . $herevet) &&
                            $fix) {
-                               while ($fixed[$linenr - 1] =~
+                               while ($fixed[$fixlinenr] =~
                                           s/(^\+.*) {8,8}+\t/$1\t\t/) {}
-                               while ($fixed[$linenr - 1] =~
+                               while ($fixed[$fixlinenr] =~
                                           s/(^\+.*) +\t/$1\t/) {}
                        }
                }
@@ -2249,19 +2456,19 @@ sub process {
                                        if (CHK("PARENTHESIS_ALIGNMENT",
                                                "Alignment should match open parenthesis\n" . $hereprev) &&
                                            $fix && $line =~ /^\+/) {
-                                               $fixed[$linenr - 1] =~
+                                               $fixed[$fixlinenr] =~
                                                    s/^\+[ \t]*/\+$goodtabindent/;
                                        }
                                }
                        }
                }
 
-               if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) {
+               if ($line =~ /^\+.*\(\s*$Type\s*\)[ \t]+(?!$Assignment|$Arithmetic|{)/) {
                        if (CHK("SPACING",
-                               "No space is necessary after a cast\n" . $hereprev) &&
+                               "No space is necessary after a cast\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
-                                   s/^(\+.*\*[ \t]*\))[ \t]+/$1/;
+                               $fixed[$fixlinenr] =~
+                                   s/(\(\s*$Type\s*\))[ \t]+/$1/;
                        }
                }
 
@@ -2291,10 +2498,44 @@ sub process {
                             "networking block comments put the trailing */ on a separate line\n" . $herecurr);
                }
 
+# check for missing blank lines after struct/union declarations
+# with exceptions for various attributes and macros
+               if ($prevline =~ /^[\+ ]};?\s*$/ &&
+                   $line =~ /^\+/ &&
+                   !($line =~ /^\+\s*$/ ||
+                     $line =~ /^\+\s*EXPORT_SYMBOL/ ||
+                     $line =~ /^\+\s*MODULE_/i ||
+                     $line =~ /^\+\s*\#\s*(?:end|elif|else)/ ||
+                     $line =~ /^\+[a-z_]*init/ ||
+                     $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ ||
+                     $line =~ /^\+\s*DECLARE/ ||
+                     $line =~ /^\+\s*__setup/)) {
+                       if (CHK("LINE_SPACING",
+                               "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev) &&
+                           $fix) {
+                               fix_insert_line($fixlinenr, "\+");
+                       }
+               }
+
+# check for multiple consecutive blank lines
+               if ($prevline =~ /^[\+ ]\s*$/ &&
+                   $line =~ /^\+\s*$/ &&
+                   $last_blank_line != ($linenr - 1)) {
+                       if (CHK("LINE_SPACING",
+                               "Please don't use multiple blank lines\n" . $hereprev) &&
+                           $fix) {
+                               fix_delete_line($fixlinenr, $rawline);
+                       }
+
+                       $last_blank_line = $linenr;
+               }
+
 # check for missing blank lines after declarations
                if ($sline =~ /^\+\s+\S/ &&                     #Not at char 1
                        # actual declarations
                    ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+                       # function pointer declarations
+                    $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
                        # foo bar; where foo is some local typedef or #define
                     $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
                        # known declaration macros
@@ -2307,6 +2548,8 @@ sub process {
                      $prevline =~ /(?:\{\s*|\\)$/) &&
                        # looks like a declaration
                    !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+                       # function pointer declarations
+                     $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
                        # foo bar; where foo is some local typedef or #define
                      $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
                        # known declaration macros
@@ -2321,8 +2564,11 @@ sub process {
                      $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
                        # indentation of previous and current line are the same
                    (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
-                       WARN("SPACING",
-                            "Missing a blank line after declarations\n" . $hereprev);
+                       if (WARN("LINE_SPACING",
+                                "Missing a blank line after declarations\n" . $hereprev) &&
+                           $fix) {
+                               fix_insert_line($fixlinenr, "\+");
+                       }
                }
 
 # check for spaces at the beginning of a line.
@@ -2335,13 +2581,33 @@ sub process {
                        if (WARN("LEADING_SPACE",
                                 "please, no spaces at the start of a line\n" . $herevet) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+                               $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
                        }
                }
 
 # check we are in a valid C source file if not then ignore this hunk
                next if ($realfile !~ /\.(h|c)$/);
 
+# check indentation of any line with a bare else
+# if the previous line is a break or return and is indented 1 tab more...
+               if ($sline =~ /^\+([\t]+)(?:}[ \t]*)?else(?:[ \t]*{)?\s*$/) {
+                       my $tabs = length($1) + 1;
+                       if ($prevline =~ /^\+\t{$tabs,$tabs}(?:break|return)\b/) {
+                               WARN("UNNECESSARY_ELSE",
+                                    "else is not generally useful after a break or return\n" . $hereprev);
+                       }
+               }
+
+# check indentation of a line with a break;
+# if the previous line is a goto or return and is indented the same # of tabs
+               if ($sline =~ /^\+([\t]+)break\s*;\s*$/) {
+                       my $tabs = $1;
+                       if ($prevline =~ /^\+$tabs(?:goto|return)\b/) {
+                               WARN("UNNECESSARY_BREAK",
+                                    "break is not useful after a goto or return\n" . $hereprev);
+                       }
+               }
+
 # discourage the addition of CONFIG_EXPERIMENTAL in #if(def).
                if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) {
                        WARN("CONFIG_EXPERIMENTAL",
@@ -2477,7 +2743,7 @@ sub process {
 
 # if/while/etc brace do not go on next line, unless defining a do while loop,
 # or if that brace on the next line is for something else
-               if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
+               if ($line =~ /(.*)\b((?:if|while|for|switch|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
                        my $pre_ctx = "$1$2";
 
                        my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
@@ -2504,7 +2770,7 @@ sub process {
                        #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
                        #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
 
-                       if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
+                       if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
                                ERROR("OPEN_BRACE",
                                      "that open brace { should be on the previous line\n" .
                                        "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
@@ -2523,7 +2789,7 @@ sub process {
                }
 
 # Check relative indent for conditionals and blocks.
-               if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
+               if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
                        ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
                                ctx_statement_block($linenr, $realcnt, 0)
                                        if (!defined $stat);
@@ -2654,8 +2920,18 @@ sub process {
 # check for initialisation to aggregates open brace on the next line
                if ($line =~ /^.\s*{/ &&
                    $prevline =~ /(?:^|[^=])=\s*$/) {
-                       ERROR("OPEN_BRACE",
-                             "that open brace { should be on the previous line\n" . $hereprev);
+                       if (ERROR("OPEN_BRACE",
+                                 "that open brace { should be on the previous line\n" . $hereprev) &&
+                           $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                               fix_delete_line($fixlinenr - 1, $prevrawline);
+                               fix_delete_line($fixlinenr, $rawline);
+                               my $fixedline = $prevrawline;
+                               $fixedline =~ s/\s*=\s*$/ = {/;
+                               fix_insert_line($fixlinenr, $fixedline);
+                               $fixedline = $line;
+                               $fixedline =~ s/^(.\s*){\s*/$1/;
+                               fix_insert_line($fixlinenr, $fixedline);
+                       }
                }
 
 #
@@ -2680,10 +2956,10 @@ sub process {
                        if (ERROR("C99_COMMENTS",
                                  "do not use C99 // comments\n" . $herecurr) &&
                            $fix) {
-                               my $line = $fixed[$linenr - 1];
+                               my $line = $fixed[$fixlinenr];
                                if ($line =~ /\/\/(.*)$/) {
                                        my $comment = trim($1);
-                                       $fixed[$linenr - 1] =~ s@\/\/(.*)$@/\* $comment \*/@;
+                                       $fixed[$fixlinenr] =~ s@\/\/(.*)$@/\* $comment \*/@;
                                }
                        }
                }
@@ -2742,7 +3018,7 @@ sub process {
                                  "do not initialise globals to 0 or NULL\n" .
                                      $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
+                               $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
                        }
                }
 # check for static initialisers.
@@ -2751,10 +3027,17 @@ sub process {
                                  "do not initialise statics to 0 or NULL\n" .
                                      $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
+                               $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
                        }
                }
 
+# check for misordered declarations of char/short/int/long with signed/unsigned
+               while ($sline =~ m{(\b$TypeMisordered\b)}g) {
+                       my $tmp = trim($1);
+                       WARN("MISORDERED_TYPE",
+                            "type '$tmp' should be specified in [[un]signed] [short|int|long|long long] order\n" . $herecurr);
+               }
+
 # check for static const char * arrays.
                if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) {
                        WARN("STATIC_CONST_CHAR_ARRAY",
@@ -2781,7 +3064,7 @@ sub process {
                        if (ERROR("FUNCTION_WITHOUT_ARGS",
                                  "Bad function definition - $1() should probably be $1(void)\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
+                               $fixed[$fixlinenr] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
                        }
                }
 
@@ -2790,7 +3073,7 @@ sub process {
                        if (WARN("DEFINE_PCI_DEVICE_TABLE",
                                 "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
+                               $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
                        }
                }
 
@@ -2827,7 +3110,7 @@ sub process {
                                        my $sub_from = $ident;
                                        my $sub_to = $ident;
                                        $sub_to =~ s/\Q$from\E/$to/;
-                                       $fixed[$linenr - 1] =~
+                                       $fixed[$fixlinenr] =~
                                            s@\Q$sub_from\E@$sub_to@;
                                }
                        }
@@ -2855,7 +3138,7 @@ sub process {
                                        my $sub_from = $match;
                                        my $sub_to = $match;
                                        $sub_to =~ s/\Q$from\E/$to/;
-                                       $fixed[$linenr - 1] =~
+                                       $fixed[$fixlinenr] =~
                                            s@\Q$sub_from\E@$sub_to@;
                                }
                        }
@@ -2917,7 +3200,7 @@ sub process {
                        if (WARN("PREFER_PR_LEVEL",
                                 "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/\bpr_warning\b/pr_warn/;
                        }
                }
@@ -2933,17 +3216,40 @@ sub process {
 
 # function brace can't be on same line, except for #defines of do while,
 # or if closed on same line
-               if (($line=~/$Type\s*$Ident\(.*\).*\s{/) and
+               if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and
                    !($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) {
-                       ERROR("OPEN_BRACE",
-                             "open brace '{' following function declarations go on the next line\n" . $herecurr);
+                       if (ERROR("OPEN_BRACE",
+                                 "open brace '{' following function declarations go on the next line\n" . $herecurr) &&
+                           $fix) {
+                               fix_delete_line($fixlinenr, $rawline);
+                               my $fixed_line = $rawline;
+                               $fixed_line =~ /(^..*$Type\s*$Ident\(.*\)\s*){(.*)$/;
+                               my $line1 = $1;
+                               my $line2 = $2;
+                               fix_insert_line($fixlinenr, ltrim($line1));
+                               fix_insert_line($fixlinenr, "\+{");
+                               if ($line2 !~ /^\s*$/) {
+                                       fix_insert_line($fixlinenr, "\+\t" . trim($line2));
+                               }
+                       }
                }
 
 # open braces for enum, union and struct go on the same line.
                if ($line =~ /^.\s*{/ &&
                    $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) {
-                       ERROR("OPEN_BRACE",
-                             "open brace '{' following $1 go on the same line\n" . $hereprev);
+                       if (ERROR("OPEN_BRACE",
+                                 "open brace '{' following $1 go on the same line\n" . $hereprev) &&
+                           $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                               fix_delete_line($fixlinenr - 1, $prevrawline);
+                               fix_delete_line($fixlinenr, $rawline);
+                               my $fixedline = rtrim($prevrawline) . " {";
+                               fix_insert_line($fixlinenr, $fixedline);
+                               $fixedline = $rawline;
+                               $fixedline =~ s/^(.\s*){\s*/$1\t/;
+                               if ($fixedline !~ /^\+\s*$/) {
+                                       fix_insert_line($fixlinenr, $fixedline);
+                               }
+                       }
                }
 
 # missing space after union, struct or enum definition
@@ -2951,7 +3257,7 @@ sub process {
                        if (WARN("SPACING",
                                 "missing space after $1 definition\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/;
                        }
                }
@@ -3021,7 +3327,7 @@ sub process {
                        }
 
                        if (show_type("SPACING") && $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex;
                        }
                }
@@ -3038,7 +3344,7 @@ sub process {
                                if (ERROR("BRACKET_SPACE",
                                          "space prohibited before open square bracket '['\n" . $herecurr) &&
                                    $fix) {
-                                   $fixed[$linenr - 1] =~
+                                   $fixed[$fixlinenr] =~
                                        s/^(\+.*?)\s+\[/$1\[/;
                                }
                        }
@@ -3073,7 +3379,7 @@ sub process {
                                if (WARN("SPACING",
                                         "space prohibited between function name and open parenthesis '('\n" . $herecurr) &&
                                             $fix) {
-                                       $fixed[$linenr - 1] =~
+                                       $fixed[$fixlinenr] =~
                                            s/\b$name\s+\(/$name\(/;
                                }
                        }
@@ -3341,8 +3647,8 @@ sub process {
                                $fixed_line = $fixed_line . $fix_elements[$#elements];
                        }
 
-                       if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) {
-                               $fixed[$linenr - 1] = $fixed_line;
+                       if ($fix && $line_fixed && $fixed_line ne $fixed[$fixlinenr]) {
+                               $fixed[$fixlinenr] = $fixed_line;
                        }
 
 
@@ -3353,7 +3659,7 @@ sub process {
                        if (WARN("SPACING",
                                 "space prohibited before semicolon\n" . $herecurr) &&
                            $fix) {
-                               1 while $fixed[$linenr - 1] =~
+                               1 while $fixed[$fixlinenr] =~
                                    s/^(\+.*\S)\s+;/$1;/;
                        }
                }
@@ -3386,7 +3692,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space required before the open brace '{'\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\))){/$1 {/;
+                               $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/;
                        }
                }
 
@@ -3404,7 +3710,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space required after that close brace '}'\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/}((?!(?:,|;|\)))\S)/} $1/;
                        }
                }
@@ -3414,7 +3720,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space prohibited after that open square bracket '['\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/\[\s+/\[/;
                        }
                }
@@ -3422,7 +3728,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space prohibited before that close square bracket ']'\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/\s+\]/\]/;
                        }
                }
@@ -3433,7 +3739,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space prohibited after that open parenthesis '('\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/\(\s+/\(/;
                        }
                }
@@ -3443,18 +3749,27 @@ sub process {
                        if (ERROR("SPACING",
                                  "space prohibited before that close parenthesis ')'\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               print("fixlinenr: <$fixlinenr> fixed[fixlinenr]: <$fixed[$fixlinenr]>\n");
+                               $fixed[$fixlinenr] =~
                                    s/\s+\)/\)/;
                        }
                }
 
+# check unnecessary parentheses around addressof/dereference single $Lvals
+# ie: &(foo->bar) should be &foo->bar and *(foo->bar) should be *foo->bar
+
+               while ($line =~ /(?:[^&]&\s*|\*)\(\s*($Ident\s*(?:$Member\s*)+)\s*\)/g) {
+                       CHK("UNNECESSARY_PARENTHESES",
+                           "Unnecessary parentheses around $1\n" . $herecurr);
+                   }
+
 #goto labels aren't indented, allow a single space however
                if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
                   !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
                        if (WARN("INDENTED_LABEL",
                                 "labels should not be indented\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/^(.)\s+/$1/;
                        }
                }
@@ -3516,7 +3831,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space required before the open parenthesis '('\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/\b(if|while|for|switch)\(/$1 \(/;
                        }
                }
@@ -3606,7 +3921,7 @@ sub process {
 # if should not continue a brace
                if ($line =~ /}\s*if\b/) {
                        ERROR("TRAILING_STATEMENTS",
-                             "trailing statements should be on next line\n" .
+                             "trailing statements should be on next line (or did you mean 'else if'?)\n" .
                                $herecurr);
                }
 # case and default should not have general statements after them
@@ -3622,14 +3937,26 @@ sub process {
 
                # Check for }<nl>else {, these must be at the same
                # indent level to be relevant to each other.
-               if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and
-                                               $previndent == $indent) {
-                       ERROR("ELSE_AFTER_BRACE",
-                             "else should follow close brace '}'\n" . $hereprev);
+               if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ &&
+                   $previndent == $indent) {
+                       if (ERROR("ELSE_AFTER_BRACE",
+                                 "else should follow close brace '}'\n" . $hereprev) &&
+                           $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                               fix_delete_line($fixlinenr - 1, $prevrawline);
+                               fix_delete_line($fixlinenr, $rawline);
+                               my $fixedline = $prevrawline;
+                               $fixedline =~ s/}\s*$//;
+                               if ($fixedline !~ /^\+\s*$/) {
+                                       fix_insert_line($fixlinenr, $fixedline);
+                               }
+                               $fixedline = $rawline;
+                               $fixedline =~ s/^(.\s*)else/$1} else/;
+                               fix_insert_line($fixlinenr, $fixedline);
+                       }
                }
 
-               if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and
-                                               $previndent == $indent) {
+               if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ &&
+                   $previndent == $indent) {
                        my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0);
 
                        # Find out what is on the end of the line after the
@@ -3638,8 +3965,18 @@ sub process {
                        $s =~ s/\n.*//g;
 
                        if ($s =~ /^\s*;/) {
-                               ERROR("WHILE_AFTER_BRACE",
-                                     "while should follow close brace '}'\n" . $hereprev);
+                               if (ERROR("WHILE_AFTER_BRACE",
+                                         "while should follow close brace '}'\n" . $hereprev) &&
+                                   $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+                                       fix_delete_line($fixlinenr - 1, $prevrawline);
+                                       fix_delete_line($fixlinenr, $rawline);
+                                       my $fixedline = $prevrawline;
+                                       my $trailing = $rawline;
+                                       $trailing =~ s/^\+//;
+                                       $trailing = trim($trailing);
+                                       $fixedline =~ s/}\s*$/} $trailing/;
+                                       fix_insert_line($fixlinenr, $fixedline);
+                               }
                        }
                }
 
@@ -3653,7 +3990,7 @@ sub process {
                                         "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) &&
                                    $fix) {
                                        my $hexval = sprintf("0x%x", oct($var));
-                                       $fixed[$linenr - 1] =~
+                                       $fixed[$fixlinenr] =~
                                            s/\b$var\b/$hexval/;
                                }
                        }
@@ -3689,7 +4026,7 @@ sub process {
                        if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
                                 "Whitespace after \\ makes next lines useless\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\s+$//;
+                               $fixed[$fixlinenr] =~ s/\s+$//;
                        }
                }
 
@@ -3762,7 +4099,7 @@ sub process {
                            $dstat !~ /^(?:$Ident|-?$Constant),$/ &&                    # 10, // foo(),
                            $dstat !~ /^(?:$Ident|-?$Constant);$/ &&                    # foo();
                            $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ &&          # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz
-                           $dstat !~ /^'X'$/ &&                                        # character constants
+                           $dstat !~ /^'X'$/ && $dstat !~ /^'XX'$/ &&                  # character constants
                            $dstat !~ /$exceptions/ &&
                            $dstat !~ /^\.$Ident\s*=/ &&                                # .foo =
                            $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ &&          # stringification #foo
@@ -4014,6 +4351,23 @@ sub process {
                        }
                }
 
+# check for unnecessary "Out of Memory" messages
+               if ($line =~ /^\+.*\b$logFunctions\s*\(/ &&
+                   $prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*\)/ &&
+                   (defined $1 || defined $3) &&
+                   $linenr > 3) {
+                       my $testval = $2;
+                       my $testline = $lines[$linenr - 3];
+
+                       my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0);
+#                      print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n");
+
+                       if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) {
+                               WARN("OOM_MESSAGE",
+                                    "Possible unnecessary 'out of memory' message\n" . $hereprev);
+                       }
+               }
+
 # check for bad placement of section $InitAttribute (e.g.: __initdata)
                if ($line =~ /(\b$InitAttribute\b)/) {
                        my $attr = $1;
@@ -4027,7 +4381,7 @@ sub process {
                                      WARN("MISPLACED_INIT",
                                           "$attr should be placed after $var\n" . $herecurr))) &&
                                    $fix) {
-                                       $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
+                                       $fixed[$fixlinenr] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
                                }
                        }
                }
@@ -4041,7 +4395,7 @@ sub process {
                        if (ERROR("INIT_ATTRIBUTE",
                                  "Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/$InitAttributeData/${attr_prefix}initconst/;
                        }
                }
@@ -4052,12 +4406,12 @@ sub process {
                        if (ERROR("INIT_ATTRIBUTE",
                                  "Use of $attr requires a separate use of const\n" . $herecurr) &&
                            $fix) {
-                               my $lead = $fixed[$linenr - 1] =~
+                               my $lead = $fixed[$fixlinenr] =~
                                    /(^\+\s*(?:static\s+))/;
                                $lead = rtrim($1);
                                $lead = "$lead " if ($lead !~ /^\+$/);
                                $lead = "${lead}const ";
-                               $fixed[$linenr - 1] =~ s/(^\+\s*(?:static\s+))/$lead/;
+                               $fixed[$fixlinenr] =~ s/(^\+\s*(?:static\s+))/$lead/;
                        }
                }
 
@@ -4070,7 +4424,7 @@ sub process {
                        if (WARN("CONSTANT_CONVERSION",
                                 "$constant_func should be $func\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\b$constant_func\b/$func/g;
+                               $fixed[$fixlinenr] =~ s/\b$constant_func\b/$func/g;
                        }
                }
 
@@ -4120,7 +4474,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "exactly one space required after that #$1\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
+                               $fixed[$fixlinenr] =~
                                    s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /;
                        }
 
@@ -4168,7 +4522,7 @@ sub process {
                        if (WARN("INLINE",
                                 "plain inline is preferred over $1\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/;
+                               $fixed[$fixlinenr] =~ s/\b(__inline__|__inline)\b/inline/;
 
                        }
                }
@@ -4193,7 +4547,7 @@ sub process {
                        if (WARN("PREFER_PRINTF",
                                 "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
+                               $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
 
                        }
                }
@@ -4204,7 +4558,7 @@ sub process {
                        if (WARN("PREFER_SCANF",
                                 "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
+                               $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
                        }
                }
 
@@ -4219,7 +4573,7 @@ sub process {
                        if (WARN("SIZEOF_PARENTHESIS",
                                 "sizeof $1 should be sizeof($1)\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
+                               $fixed[$fixlinenr] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
                        }
                }
 
@@ -4242,7 +4596,7 @@ sub process {
                                if (WARN("PREFER_SEQ_PUTS",
                                         "Prefer seq_puts to seq_printf\n" . $herecurr) &&
                                    $fix) {
-                                       $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/;
+                                       $fixed[$fixlinenr] =~ s/\bseq_printf\b/seq_puts/;
                                }
                        }
                }
@@ -4271,7 +4625,7 @@ sub process {
                        if (WARN("PREFER_ETHER_ADDR_COPY",
                                 "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
+                               $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
                        }
                }
 
@@ -4359,7 +4713,7 @@ sub process {
                        if (CHK("AVOID_EXTERNS",
                                "extern prototypes should be avoided in .h files\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
+                               $fixed[$fixlinenr] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
                        }
                }
 
@@ -4419,23 +4773,24 @@ sub process {
 
 # check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc
                if ($^V && $^V ge 5.10.0 &&
-                   $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/) {
+                   $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) {
                        my $oldfunc = $3;
                        my $a1 = $4;
                        my $a2 = $10;
                        my $newfunc = "kmalloc_array";
                        $newfunc = "kcalloc" if ($oldfunc eq "kzalloc");
-                       if ($a1 =~ /^sizeof\s*\S/ || $a2 =~ /^sizeof\s*\S/) {
+                       my $r1 = $a1;
+                       my $r2 = $a2;
+                       if ($a1 =~ /^sizeof\s*\S/) {
+                               $r1 = $a2;
+                               $r2 = $a1;
+                       }
+                       if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ &&
+                           !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) {
                                if (WARN("ALLOC_WITH_MULTIPLY",
                                         "Prefer $newfunc over $oldfunc with multiply\n" . $herecurr) &&
                                    $fix) {
-                                       my $r1 = $a1;
-                                       my $r2 = $a2;
-                                       if ($a1 =~ /^sizeof\s*\S/) {
-                                               $r1 = $a2;
-                                               $r2 = $a1;
-                                       }
-                                       $fixed[$linenr - 1] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
+                                       $fixed[$fixlinenr] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
 
                                }
                        }
@@ -4459,17 +4814,17 @@ sub process {
                        if (WARN("ONE_SEMICOLON",
                                 "Statements terminations use 1 semicolon\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g;
+                               $fixed[$fixlinenr] =~ s/(\s*;\s*){2,}$/;/g;
                        }
                }
 
-# check for case / default statements not preceeded by break/fallthrough/switch
+# check for case / default statements not preceded by break/fallthrough/switch
                if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) {
                        my $has_break = 0;
                        my $has_statement = 0;
                        my $count = 0;
                        my $prevline = $linenr;
-                       while ($prevline > 1 && $count < 3 && !$has_break) {
+                       while ($prevline > 1 && ($file || $count < 3) && !$has_break) {
                                $prevline--;
                                my $rline = $rawlines[$prevline - 1];
                                my $fline = $lines[$prevline - 1];
@@ -4507,7 +4862,7 @@ sub process {
                        if (WARN("USE_FUNC",
                                 "__func__ should be used instead of gcc specific __FUNCTION__\n"  . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/\b__FUNCTION__\b/__func__/g;
+                               $fixed[$fixlinenr] =~ s/\b__FUNCTION__\b/__func__/g;
                        }
                }
 
@@ -4750,12 +5105,16 @@ sub process {
        hash_show_words(\%use_type, "Used");
        hash_show_words(\%ignore_type, "Ignored");
 
-       if ($clean == 0 && $fix && "@rawlines" ne "@fixed") {
+       if ($clean == 0 && $fix &&
+           ("@rawlines" ne "@fixed" ||
+            $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) {
                my $newfile = $filename;
                $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace);
                my $linecount = 0;
                my $f;
 
+               @fixed = fix_inserted_deleted_lines(\@fixed, \@fixed_inserted, \@fixed_deleted);
+
                open($f, '>', $newfile)
                    or die "$P: Can't open $newfile for write\n";
                foreach my $fixed_line (@fixed) {
@@ -4763,7 +5122,7 @@ sub process {
                        if ($file) {
                                if ($linecount > 3) {
                                        $fixed_line =~ s/^\+//;
-                                       print $f $fixed_line. "\n";
+                                       print $f $fixed_line . "\n";
                                }
                        } else {
                                print $f $fixed_line . "\n";