Merge branch 'topic/paca' into next
[linux-2.6-block.git] / arch / powerpc / mm / pgtable-radix.c
index ab9db0afd2c899747ef2353fe96822d413858731..7095384344b4654311447de94ea919a5b7ef588f 100644 (file)
@@ -48,20 +48,88 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
        return 0;
 }
 
-static __ref void *early_alloc_pgtable(unsigned long size)
+static __ref void *early_alloc_pgtable(unsigned long size, int nid,
+                       unsigned long region_start, unsigned long region_end)
 {
+       unsigned long pa = 0;
        void *pt;
 
-       pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
+       if (region_start || region_end) /* has region hint */
+               pa = memblock_alloc_range(size, size, region_start, region_end,
+                                               MEMBLOCK_NONE);
+       else if (nid != -1) /* has node hint */
+               pa = memblock_alloc_base_nid(size, size,
+                                               MEMBLOCK_ALLOC_ANYWHERE,
+                                               nid, MEMBLOCK_NONE);
+
+       if (!pa)
+               pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
+
+       BUG_ON(!pa);
+
+       pt = __va(pa);
        memset(pt, 0, size);
 
        return pt;
 }
 
-int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
                          pgprot_t flags,
-                         unsigned int map_page_size)
+                         unsigned int map_page_size,
+                         int nid,
+                         unsigned long region_start, unsigned long region_end)
 {
+       unsigned long pfn = pa >> PAGE_SHIFT;
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep;
+
+       pgdp = pgd_offset_k(ea);
+       if (pgd_none(*pgdp)) {
+               pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
+                                               region_start, region_end);
+               pgd_populate(&init_mm, pgdp, pudp);
+       }
+       pudp = pud_offset(pgdp, ea);
+       if (map_page_size == PUD_SIZE) {
+               ptep = (pte_t *)pudp;
+               goto set_the_pte;
+       }
+       if (pud_none(*pudp)) {
+               pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
+                                               region_start, region_end);
+               pud_populate(&init_mm, pudp, pmdp);
+       }
+       pmdp = pmd_offset(pudp, ea);
+       if (map_page_size == PMD_SIZE) {
+               ptep = pmdp_ptep(pmdp);
+               goto set_the_pte;
+       }
+       if (!pmd_present(*pmdp)) {
+               ptep = early_alloc_pgtable(PAGE_SIZE, nid,
+                                               region_start, region_end);
+               pmd_populate_kernel(&init_mm, pmdp, ptep);
+       }
+       ptep = pte_offset_kernel(pmdp, ea);
+
+set_the_pte:
+       set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+       smp_wmb();
+       return 0;
+}
+
+/*
+ * nid, region_start, and region_end are hints to try to place the page
+ * table memory in the same node or region.
+ */
+static int __map_kernel_page(unsigned long ea, unsigned long pa,
+                         pgprot_t flags,
+                         unsigned int map_page_size,
+                         int nid,
+                         unsigned long region_start, unsigned long region_end)
+{
+       unsigned long pfn = pa >> PAGE_SHIFT;
        pgd_t *pgdp;
        pud_t *pudp;
        pmd_t *pmdp;
@@ -70,61 +138,48 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
         * Make sure task size is correct as per the max adddr
         */
        BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
-       if (slab_is_available()) {
-               pgdp = pgd_offset_k(ea);
-               pudp = pud_alloc(&init_mm, pgdp, ea);
-               if (!pudp)
-                       return -ENOMEM;
-               if (map_page_size == PUD_SIZE) {
-                       ptep = (pte_t *)pudp;
-                       goto set_the_pte;
-               }
-               pmdp = pmd_alloc(&init_mm, pudp, ea);
-               if (!pmdp)
-                       return -ENOMEM;
-               if (map_page_size == PMD_SIZE) {
-                       ptep = pmdp_ptep(pmdp);
-                       goto set_the_pte;
-               }
-               ptep = pte_alloc_kernel(pmdp, ea);
-               if (!ptep)
-                       return -ENOMEM;
-       } else {
-               pgdp = pgd_offset_k(ea);
-               if (pgd_none(*pgdp)) {
-                       pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
-                       BUG_ON(pudp == NULL);
-                       pgd_populate(&init_mm, pgdp, pudp);
-               }
-               pudp = pud_offset(pgdp, ea);
-               if (map_page_size == PUD_SIZE) {
-                       ptep = (pte_t *)pudp;
-                       goto set_the_pte;
-               }
-               if (pud_none(*pudp)) {
-                       pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
-                       BUG_ON(pmdp == NULL);
-                       pud_populate(&init_mm, pudp, pmdp);
-               }
-               pmdp = pmd_offset(pudp, ea);
-               if (map_page_size == PMD_SIZE) {
-                       ptep = pmdp_ptep(pmdp);
-                       goto set_the_pte;
-               }
-               if (!pmd_present(*pmdp)) {
-                       ptep = early_alloc_pgtable(PAGE_SIZE);
-                       BUG_ON(ptep == NULL);
-                       pmd_populate_kernel(&init_mm, pmdp, ptep);
-               }
-               ptep = pte_offset_kernel(pmdp, ea);
+
+       if (unlikely(!slab_is_available()))
+               return early_map_kernel_page(ea, pa, flags, map_page_size,
+                                               nid, region_start, region_end);
+
+       /*
+        * Should make page table allocation functions be able to take a
+        * node, so we can place kernel page tables on the right nodes after
+        * boot.
+        */
+       pgdp = pgd_offset_k(ea);
+       pudp = pud_alloc(&init_mm, pgdp, ea);
+       if (!pudp)
+               return -ENOMEM;
+       if (map_page_size == PUD_SIZE) {
+               ptep = (pte_t *)pudp;
+               goto set_the_pte;
        }
+       pmdp = pmd_alloc(&init_mm, pudp, ea);
+       if (!pmdp)
+               return -ENOMEM;
+       if (map_page_size == PMD_SIZE) {
+               ptep = pmdp_ptep(pmdp);
+               goto set_the_pte;
+       }
+       ptep = pte_alloc_kernel(pmdp, ea);
+       if (!ptep)
+               return -ENOMEM;
 
 set_the_pte:
-       set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+       set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
        smp_wmb();
        return 0;
 }
 
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+                         pgprot_t flags,
+                         unsigned int map_page_size)
+{
+       return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
+}
+
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void radix__change_memory_range(unsigned long start, unsigned long end,
                                unsigned long clear)
@@ -211,7 +266,8 @@ static inline void __meminit print_mapping(unsigned long start,
 }
 
 static int __meminit create_physical_mapping(unsigned long start,
-                                            unsigned long end)
+                                            unsigned long end,
+                                            int nid)
 {
        unsigned long vaddr, addr, mapping_size = 0;
        pgprot_t prot;
@@ -267,7 +323,7 @@ retry:
                else
                        prot = PAGE_KERNEL;
 
-               rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
+               rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
                if (rc)
                        return rc;
        }
@@ -276,7 +332,7 @@ retry:
        return 0;
 }
 
-static void __init radix_init_pgtable(void)
+void __init radix_init_pgtable(void)
 {
        unsigned long rts_field;
        struct memblock_region *reg;
@@ -286,9 +342,16 @@ static void __init radix_init_pgtable(void)
        /*
         * Create the linear mapping, using standard page size for now
         */
-       for_each_memblock(memory, reg)
+       for_each_memblock(memory, reg) {
+               /*
+                * The memblock allocator  is up at this point, so the
+                * page tables will be allocated within the range. No
+                * need or a node (which we don't have yet).
+                */
                WARN_ON(create_physical_mapping(reg->base,
-                                               reg->base + reg->size));
+                                               reg->base + reg->size,
+                                               -1));
+       }
 
        /* Find out how many PID bits are supported */
        if (cpu_has_feature(CPU_FTR_HVMODE)) {
@@ -317,7 +380,7 @@ static void __init radix_init_pgtable(void)
         * host.
         */
        BUG_ON(PRTB_SIZE_SHIFT > 36);
-       process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+       process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
        /*
         * Fill in the process table.
         */
@@ -705,8 +768,8 @@ static int __meminit stop_machine_change_mapping(void *data)
 
        spin_unlock(&init_mm.page_table_lock);
        pte_clear(&init_mm, params->aligned_start, params->pte);
-       create_physical_mapping(params->aligned_start, params->start);
-       create_physical_mapping(params->end, params->aligned_end);
+       create_physical_mapping(params->aligned_start, params->start, -1);
+       create_physical_mapping(params->end, params->aligned_end, -1);
        spin_lock(&init_mm.page_table_lock);
        return 0;
 }
@@ -863,9 +926,9 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
        radix__flush_tlb_kernel_range(start, end);
 }
 
-int __meminit radix__create_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
-       return create_physical_mapping(start, end);
+       return create_physical_mapping(start, end, nid);
 }
 
 int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
@@ -876,14 +939,25 @@ int __meminit radix__remove_section_mapping(unsigned long start, unsigned long e
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
+static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
+                                pgprot_t flags, unsigned int map_page_size,
+                                int nid)
+{
+       return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
+}
+
 int __meminit radix__vmemmap_create_mapping(unsigned long start,
                                      unsigned long page_size,
                                      unsigned long phys)
 {
        /* Create a PTE encoding */
        unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+       int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+       int ret;
+
+       ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
+       BUG_ON(ret);
 
-       BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
        return 0;
 }