x86: Remove BIOS data range from e820
[linux-block.git] / arch / x86 / kernel / setup.c
index e09f0e2c14b5e83487b91a9a02049a802a7830ef..f9b1f4e5ab74cb5dc7cf4c0b64c0edfc3e0709af 100644 (file)
 #include <asm/percpu.h>
 #include <asm/topology.h>
 #include <asm/apicdef.h>
+#include <asm/k8.h>
 #ifdef CONFIG_X86_64
 #include <asm/numa_64.h>
 #endif
@@ -486,42 +487,11 @@ static void __init reserve_early_setup_data(void)
 
 #ifdef CONFIG_KEXEC
 
-/**
- * Reserve @size bytes of crashkernel memory at any suitable offset.
- *
- * @size: Size of the crashkernel memory to reserve.
- * Returns the base address on success, and -1ULL on failure.
- */
-static
-unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
-{
-       const unsigned long long alignment = 16<<20;    /* 16M */
-       unsigned long long start = 0LL;
-
-       while (1) {
-               int ret;
-
-               start = find_e820_area(start, ULONG_MAX, size, alignment);
-               if (start == -1ULL)
-                       return start;
-
-               /* try to reserve it */
-               ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
-               if (ret >= 0)
-                       return start;
-
-               start += alignment;
-       }
-}
-
 static inline unsigned long long get_total_mem(void)
 {
        unsigned long long total;
 
-       total = max_low_pfn - min_low_pfn;
-#ifdef CONFIG_HIGHMEM
-       total += highend_pfn - highstart_pfn;
-#endif
+       total = max_pfn - min_low_pfn;
 
        return total << PAGE_SHIFT;
 }
@@ -541,21 +511,25 @@ static void __init reserve_crashkernel(void)
 
        /* 0 means: find the address automatically */
        if (crash_base <= 0) {
-               crash_base = find_and_reserve_crashkernel(crash_size);
+               const unsigned long long alignment = 16<<20;    /* 16M */
+
+               crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
+                                alignment);
                if (crash_base == -1ULL) {
-                       pr_info("crashkernel reservation failed. "
-                               "No suitable area found.\n");
+                       pr_info("crashkernel reservation failed - No suitable area found.\n");
                        return;
                }
        } else {
-               ret = reserve_bootmem_generic(crash_base, crash_size,
-                                       BOOTMEM_EXCLUSIVE);
-               if (ret < 0) {
-                       pr_info("crashkernel reservation failed - "
-                               "memory is in use\n");
+               unsigned long long start;
+
+               start = find_e820_area(crash_base, ULONG_MAX, crash_size,
+                                1<<20);
+               if (start != crash_base) {
+                       pr_info("crashkernel reservation failed - memory is in use.\n");
                        return;
                }
        }
+       reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
 
        printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
                        "for crashkernel (System RAM: %ldMB)\n",
@@ -676,6 +650,23 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
        {}
 };
 
+static void __init trim_bios_range(void)
+{
+       /*
+        * A special case is the first 4Kb of memory;
+        * This is a BIOS owned area, not kernel ram, but generally
+        * not listed as such in the E820 table.
+        */
+       e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
+       /*
+        * special case: Some BIOSen report the PC BIOS
+        * area (640->1Mb) as ram even though it is not.
+        * take them out.
+        */
+       e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+       sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -691,6 +682,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
 
 void __init setup_arch(char **cmdline_p)
 {
+       int acpi = 0;
+       int k8 = 0;
+
 #ifdef CONFIG_X86_32
        memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
        visws_early_detect();
@@ -783,21 +777,18 @@ void __init setup_arch(char **cmdline_p)
        strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
 
-#ifdef CONFIG_X86_64
        /*
-        * Must call this twice: Once just to detect whether hardware doesn't
-        * support NX (so that the early EHCI debug console setup can safely
-        * call set_fixmap(), and then again after parsing early parameters to
-        * honor the respective command line option.
+        * x86_configure_nx() is called before parse_early_param() to detect
+        * whether hardware doesn't support NX (so that the early EHCI debug
+        * console setup can safely call set_fixmap()). It may then be called
+        * again from within noexec_setup() during parsing early parameters
+        * to honor the respective command line option.
         */
-       check_efer();
-#endif
+       x86_configure_nx();
 
        parse_early_param();
 
-#ifdef CONFIG_X86_64
-       check_efer();
-#endif
+       x86_report_nx();
 
        /* Must be before kernel pagetables are setup */
        vmi_activate();
@@ -839,7 +830,7 @@ void __init setup_arch(char **cmdline_p)
        insert_resource(&iomem_resource, &data_resource);
        insert_resource(&iomem_resource, &bss_resource);
 
-
+       trim_bios_range();
 #ifdef CONFIG_X86_32
        if (ppro_with_ram_bug()) {
                e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
@@ -893,6 +884,13 @@ void __init setup_arch(char **cmdline_p)
 
        reserve_brk();
 
+#ifdef CONFIG_ACPI_SLEEP
+       /*
+        * Reserve low memory region for sleep support.
+        * even before init_memory_mapping
+        */
+       acpi_reserve_wakeup_memory();
+#endif
        init_gbpages();
 
        /* max_pfn_mapped is updated here */
@@ -919,6 +917,8 @@ void __init setup_arch(char **cmdline_p)
 
        reserve_initrd();
 
+       reserve_crashkernel();
+
        vsmp_init();
 
        io_delay_init();
@@ -930,27 +930,24 @@ void __init setup_arch(char **cmdline_p)
 
        early_acpi_boot_init();
 
+       /*
+        * Find and reserve possible boot-time SMP configuration:
+        */
+       find_smp_config();
+
 #ifdef CONFIG_ACPI_NUMA
        /*
         * Parse SRAT to discover nodes.
         */
-       acpi_numa_init();
+       acpi = acpi_numa_init();
 #endif
 
-       initmem_init(0, max_pfn);
-
-#ifdef CONFIG_ACPI_SLEEP
-       /*
-        * Reserve low memory region for sleep support.
-        */
-       acpi_reserve_bootmem();
+#ifdef CONFIG_K8_NUMA
+       if (!acpi)
+               k8 = !k8_numa_init(0, max_pfn);
 #endif
-       /*
-        * Find and reserve possible boot-time SMP configuration:
-        */
-       find_smp_config();
 
-       reserve_crashkernel();
+       initmem_init(0, max_pfn, acpi, k8);
 
 #ifdef CONFIG_X86_64
        /*