Merge tag 'libnvdimm-for-4.19_dax-memory-failure' of gitolite.kernel.org:pub/scm...
[linux-block.git] / drivers / dax / device.c
index 0a2acd7993f0b3a561cf91beb0b4200838f601f4..6fd46083e62958eea61716225cd9f6c7fe11e748 100644 (file)
@@ -248,13 +248,12 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
        return -1;
 }
 
-static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
+                               struct vm_fault *vmf, pfn_t *pfn)
 {
        struct device *dev = &dev_dax->dev;
        struct dax_region *dax_region;
-       int rc = VM_FAULT_SIGBUS;
        phys_addr_t phys;
-       pfn_t pfn;
        unsigned int fault_size = PAGE_SIZE;
 
        if (check_vma(dev_dax, vmf->vma, __func__))
@@ -276,26 +275,19 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
                return VM_FAULT_SIGBUS;
        }
 
-       pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
-
-       rc = vm_insert_mixed(vmf->vma, vmf->address, pfn);
-
-       if (rc == -ENOMEM)
-               return VM_FAULT_OOM;
-       if (rc < 0 && rc != -EBUSY)
-               return VM_FAULT_SIGBUS;
+       *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-       return VM_FAULT_NOPAGE;
+       return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
 }
 
-static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
+                               struct vm_fault *vmf, pfn_t *pfn)
 {
        unsigned long pmd_addr = vmf->address & PMD_MASK;
        struct device *dev = &dev_dax->dev;
        struct dax_region *dax_region;
        phys_addr_t phys;
        pgoff_t pgoff;
-       pfn_t pfn;
        unsigned int fault_size = PMD_SIZE;
 
        if (check_vma(dev_dax, vmf->vma, __func__))
@@ -331,21 +323,21 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
                return VM_FAULT_SIGBUS;
        }
 
-       pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
+       *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-       return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn,
+       return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn,
                        vmf->flags & FAULT_FLAG_WRITE);
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
+                               struct vm_fault *vmf, pfn_t *pfn)
 {
        unsigned long pud_addr = vmf->address & PUD_MASK;
        struct device *dev = &dev_dax->dev;
        struct dax_region *dax_region;
        phys_addr_t phys;
        pgoff_t pgoff;
-       pfn_t pfn;
        unsigned int fault_size = PUD_SIZE;
 
 
@@ -382,23 +374,26 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
                return VM_FAULT_SIGBUS;
        }
 
-       pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
+       *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-       return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, pfn,
+       return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn,
                        vmf->flags & FAULT_FLAG_WRITE);
 }
 #else
-static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
+                               struct vm_fault *vmf, pfn_t *pfn)
 {
        return VM_FAULT_FALLBACK;
 }
 #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 
-static int dev_dax_huge_fault(struct vm_fault *vmf,
+static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
                enum page_entry_size pe_size)
 {
-       int rc, id;
        struct file *filp = vmf->vma->vm_file;
+       unsigned long fault_size;
+       int rc, id;
+       pfn_t pfn;
        struct dev_dax *dev_dax = filp->private_data;
 
        dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
@@ -408,23 +403,49 @@ static int dev_dax_huge_fault(struct vm_fault *vmf,
        id = dax_read_lock();
        switch (pe_size) {
        case PE_SIZE_PTE:
-               rc = __dev_dax_pte_fault(dev_dax, vmf);
+               fault_size = PAGE_SIZE;
+               rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
                break;
        case PE_SIZE_PMD:
-               rc = __dev_dax_pmd_fault(dev_dax, vmf);
+               fault_size = PMD_SIZE;
+               rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
                break;
        case PE_SIZE_PUD:
-               rc = __dev_dax_pud_fault(dev_dax, vmf);
+               fault_size = PUD_SIZE;
+               rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
                break;
        default:
                rc = VM_FAULT_SIGBUS;
        }
+
+       if (rc == VM_FAULT_NOPAGE) {
+               unsigned long i;
+               pgoff_t pgoff;
+
+               /*
+                * In the device-dax case the only possibility for a
+                * VM_FAULT_NOPAGE result is when device-dax capacity is
+                * mapped. No need to consider the zero page, or racing
+                * conflicting mappings.
+                */
+               pgoff = linear_page_index(vmf->vma, vmf->address
+                               & ~(fault_size - 1));
+               for (i = 0; i < fault_size / PAGE_SIZE; i++) {
+                       struct page *page;
+
+                       page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
+                       if (page->mapping)
+                               continue;
+                       page->mapping = filp->f_mapping;
+                       page->index = pgoff + i;
+               }
+       }
        dax_read_unlock(id);
 
        return rc;
 }
 
-static int dev_dax_fault(struct vm_fault *vmf)
+static vm_fault_t dev_dax_fault(struct vm_fault *vmf)
 {
        return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
 }