mm: add get_dump_page

[linux-block.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index aede2ce3aba4fdf1159946cffc7b6acaf8b534d3..a8430ff138374c3332f3ebf1766be6d95c068883 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -45,6 +45,7 @@
  #include <linux/swap.h>
  #include <linux/highmem.h>
  #include <linux/pagemap.h>
+#include <linux/ksm.h>
  #include <linux/rmap.h>
  #include <linux/module.h>
  #include <linux/delayacct.h>
@@ -56,6 +57,7 @@
  #include <linux/swapops.h>
  #include <linux/elf.h>
  
+#include <asm/io.h>
  #include <asm/pgalloc.h>
  #include <asm/uaccess.h>
  #include <asm/tlb.h>
@@ -596,8 +598,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         page = vm_normal_page(vma, addr, pte);
         if (page) {
                 get_page(page);
-               page_dup_rmap(page, vma, addr);
-               rss[!!PageAnon(page)]++;
+               page_dup_rmap(page);
+               rss[PageAnon(page)]++;
         }
  
  out_set_pte:
@@ -1215,8 +1217,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
         unsigned int vm_flags = 0;
         int write = !!(flags & GUP_FLAGS_WRITE);
         int force = !!(flags & GUP_FLAGS_FORCE);
-       int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
-       int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
  
         if (nr_pages <= 0)
                 return 0;
@@ -1242,7 +1242,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         pte_t *pte;
  
                         /* user gate pages are read-only */
-                       if (!ignore && write)
+                       if (write)
                                 return i ? : -EFAULT;
                         if (pg > TASK_SIZE)
                                 pgd = pgd_offset_k(pg);
@@ -1276,7 +1276,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  
                 if (!vma ||
                     (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
-                   (!ignore && !(vm_flags & vma->vm_flags)))
+                   !(vm_flags & vma->vm_flags))
                         return i ? : -EFAULT;
  
                 if (is_vm_hugetlb_page(vma)) {
@@ -1296,13 +1296,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  
                         /*
                          * If we have a pending SIGKILL, don't keep faulting
-                        * pages and potentially allocating memory, unless
-                        * current is handling munlock--e.g., on exit. In
-                        * that case, we are not allocating memory.  Rather,
-                        * we're only unlocking already resident/mapped pages.
+                        * pages and potentially allocating memory.
                          */
-                       if (unlikely(!ignore_sigkill &&
-                                       fatal_signal_pending(current)))
+                       if (unlikely(fatal_signal_pending(current)))
                                 return i ? i : -ERESTARTSYS;
  
                         if (write)
@@ -1427,9 +1423,40 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  
         return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
  }
-
  EXPORT_SYMBOL(get_user_pages);
  
+/**
+ * get_dump_page() - pin user page in memory while writing it to core dump
+ * @addr: user address
+ *
+ * Returns struct page pointer of user page pinned for dump,
+ * to be freed afterwards by page_cache_release() or put_page().
+ *
+ * Returns NULL on any kind of failure - a hole must then be inserted into
+ * the corefile, to preserve alignment with its headers; and also returns
+ * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
+ * allowing a hole to be left in the corefile to save diskspace.
+ *
+ * Called without mmap_sem, but after all other threads have been killed.
+ */
+#ifdef CONFIG_ELF_CORE
+struct page *get_dump_page(unsigned long addr)
+{
+       struct vm_area_struct *vma;
+       struct page *page;
+
+       if (__get_user_pages(current, current->mm, addr, 1,
+                               GUP_FLAGS_FORCE, &page, &vma) < 1)
+               return NULL;
+       if (page == ZERO_PAGE(0)) {
+               page_cache_release(page);
+               return NULL;
+       }
+       flush_cache_page(vma, addr, page_to_pfn(page));
+       return page;
+}
+#endif /* CONFIG_ELF_CORE */
+
  pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
                         spinlock_t **ptl)
  {
@@ -1973,7 +2000,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
          * Take out anonymous pages first, anonymous shared vmas are
          * not dirty accountable.
          */
-       if (PageAnon(old_page)) {
+       if (PageAnon(old_page) && !PageKsm(old_page)) {
                 if (!trylock_page(old_page)) {
                         page_cache_get(old_page);
                         pte_unmap_unlock(page_table, ptl);
@@ -2114,9 +2141,14 @@ gotten:
                  * seen in the presence of one thread doing SMC and another
                  * thread doing COW.
                  */
-               ptep_clear_flush_notify(vma, address, page_table);
+               ptep_clear_flush(vma, address, page_table);
                 page_add_new_anon_rmap(new_page, vma, address);
-               set_pte_at(mm, address, page_table, entry);
+               /*
+                * We call the notify macro here because, when using secondary
+                * mmu page tables (such as kvm shadow page tables), we want the
+                * new page to be mapped directly into the secondary page table.
+                */
+               set_pte_at_notify(mm, address, page_table, entry);
                 update_mmu_cache(vma, address, entry);
                 if (old_page) {
                         /*
@@ -2643,6 +2675,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
         page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
         if (!pte_none(*page_table))
                 goto release;
+
         inc_mm_counter(mm, anon_rss);
         page_add_new_anon_rmap(page, vma, address);
         set_pte_at(mm, address, page_table, entry);