Merge x86-64 update from Andi
authorLinus Torvalds <torvalds@g5.osdl.org>
Tue, 15 Nov 2005 03:56:02 +0000 (19:56 -0800)
committerLinus Torvalds <torvalds@g5.osdl.org>
Tue, 15 Nov 2005 03:56:02 +0000 (19:56 -0800)
18 files changed:
1  2 
arch/i386/kernel/cpu/common.c
arch/i386/kernel/cpu/intel.c
arch/i386/kernel/smpboot.c
arch/ia64/Kconfig
arch/x86_64/Kconfig
arch/x86_64/Kconfig.debug
arch/x86_64/kernel/i8259.c
arch/x86_64/kernel/process.c
arch/x86_64/kernel/smpboot.c
drivers/char/agp/amd64-agp.c
include/asm-i386/processor.h
include/asm-x86_64/desc.h
include/asm-x86_64/pgtable.h
include/asm-x86_64/smp.h
include/linux/gfp.h
include/linux/mm.h
include/linux/mmzone.h
mm/page_alloc.c

index c145fb30002ed6894d37ffa03b33227166337243,4e9c2e99b0a52754f5c32aeff7ba430b8c96979c..31e344b26bae824f255baa1446980486d8aff52a
@@@ -30,6 -30,8 +30,6 @@@ static int disable_x86_serial_nr __devi
  
  struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
  
 -extern void mcheck_init(struct cpuinfo_x86 *c);
 -
  extern int disable_pse;
  
  static void default_init(struct cpuinfo_x86 * c)
@@@ -231,10 -233,10 +231,10 @@@ static void __init early_cpu_detect(voi
                cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
                c->x86 = (tfms >> 8) & 15;
                c->x86_model = (tfms >> 4) & 15;
-               if (c->x86 == 0xf) {
+               if (c->x86 == 0xf)
                        c->x86 += (tfms >> 20) & 0xff;
+               if (c->x86 >= 0x6)
                        c->x86_model += ((tfms >> 16) & 0xF) << 4;
-               }
                c->x86_mask = tfms & 15;
                if (cap0 & (1<<19))
                        c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
@@@ -333,7 -335,7 +333,7 @@@ void __devinit identify_cpu(struct cpui
        c->x86_model = c->x86_mask = 0; /* So far unknown... */
        c->x86_vendor_id[0] = '\0'; /* Unset */
        c->x86_model_id[0] = '\0';  /* Unset */
-       c->x86_num_cores = 1;
+       c->x86_max_cores = 1;
        memset(&c->x86_capability, 0, sizeof c->x86_capability);
  
        if (!have_cpuid_p()) {
        }
  
        /* Init Machine Check Exception if available. */
 -#ifdef CONFIG_X86_MCE
        mcheck_init(c);
 -#endif
 +
        if (c == &boot_cpu_data)
                sysenter_setup();
        enable_sep_cpu();
  void __devinit detect_ht(struct cpuinfo_x86 *c)
  {
        u32     eax, ebx, ecx, edx;
-       int     index_msb, tmp;
+       int     index_msb, core_bits;
        int     cpu = smp_processor_id();
  
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+       c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
        if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                return;
  
-       cpuid(1, &eax, &ebx, &ecx, &edx);
        smp_num_siblings = (ebx & 0xff0000) >> 16;
  
        if (smp_num_siblings == 1) {
                printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
        } else if (smp_num_siblings > 1 ) {
-               index_msb = 31;
  
                if (smp_num_siblings > NR_CPUS) {
                        printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
                        smp_num_siblings = 1;
                        return;
                }
-               tmp = smp_num_siblings;
-               while ((tmp & 0x80000000 ) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
+               index_msb = get_count_order(smp_num_siblings);
                phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
  
                printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
                       phys_proc_id[cpu]);
  
-               smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
  
-               tmp = smp_num_siblings;
-               index_msb = 31;
-               while ((tmp & 0x80000000) == 0) {
-                       tmp <<=1 ;
-                       index_msb--;
-               }
+               index_msb = get_count_order(smp_num_siblings) ;
  
-               if (smp_num_siblings & (smp_num_siblings - 1))
-                       index_msb++;
+               core_bits = get_count_order(c->x86_max_cores);
  
-               cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+               cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+                                              ((1 << core_bits) - 1);
  
-               if (c->x86_num_cores > 1)
+               if (c->x86_max_cores > 1)
                        printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
                               cpu_core_id[cpu]);
        }
index c28d26fb5f241891c02b3fd5fd7f508083315334,8d603ba2812695e16f7b7ed9a0d0d5ceb55a51d2..5e2da704f0faa8200fa744e39d8ffb86dd860542
@@@ -6,7 -6,6 +6,7 @@@
  #include <linux/bitops.h>
  #include <linux/smp.h>
  #include <linux/thread_info.h>
 +#include <linux/module.h>
  
  #include <asm/processor.h>
  #include <asm/msr.h>
@@@ -158,7 -157,7 +158,7 @@@ static void __devinit init_intel(struc
        if ( p )
                strcpy(c->x86_model_id, p);
        
-       c->x86_num_cores = num_cpu_cores(c);
+       c->x86_max_cores = num_cpu_cores(c);
  
        detect_ht(c);
  
@@@ -265,52 -264,5 +265,52 @@@ __init int intel_cpu_init(void
        return 0;
  }
  
 +#ifndef CONFIG_X86_CMPXCHG
 +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
 +{
 +      u8 prev;
 +      unsigned long flags;
 +
 +      /* Poor man's cmpxchg for 386. Unsuitable for SMP */
 +      local_irq_save(flags);
 +      prev = *(u8 *)ptr;
 +      if (prev == old)
 +              *(u8 *)ptr = new;
 +      local_irq_restore(flags);
 +      return prev;
 +}
 +EXPORT_SYMBOL(cmpxchg_386_u8);
 +
 +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
 +{
 +      u16 prev;
 +      unsigned long flags;
 +
 +      /* Poor man's cmpxchg for 386. Unsuitable for SMP */
 +      local_irq_save(flags);
 +      prev = *(u16 *)ptr;
 +      if (prev == old)
 +              *(u16 *)ptr = new;
 +      local_irq_restore(flags);
 +      return prev;
 +}
 +EXPORT_SYMBOL(cmpxchg_386_u16);
 +
 +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
 +{
 +      u32 prev;
 +      unsigned long flags;
 +
 +      /* Poor man's cmpxchg for 386. Unsuitable for SMP */
 +      local_irq_save(flags);
 +      prev = *(u32 *)ptr;
 +      if (prev == old)
 +              *(u32 *)ptr = new;
 +      local_irq_restore(flags);
 +      return prev;
 +}
 +EXPORT_SYMBOL(cmpxchg_386_u32);
 +#endif
 +
  // arch_initcall(intel_cpu_init);
  
index bc5a9d97466b572cf5e6755fc5e5de0eb828b11f,0a9c6465523694bab79436088ae0d21ba4ed0489..d16520da4550e6d1b5d318a56a2f1ba1b852728c
@@@ -68,13 -68,17 +68,15 @@@ EXPORT_SYMBOL(smp_num_siblings)
  
  /* Package ID of each logical CPU */
  int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 -EXPORT_SYMBOL(phys_proc_id);
  
  /* Core ID of each logical CPU */
  int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
 -EXPORT_SYMBOL(cpu_core_id);
  
+ /* representing HT siblings of each logical CPU */
  cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
  EXPORT_SYMBOL(cpu_sibling_map);
  
+ /* representing HT and core siblings of each logical CPU */
  cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
  EXPORT_SYMBOL(cpu_core_map);
  
@@@ -442,35 -446,60 +444,60 @@@ static void __devinit smp_callin(void
  
  static int cpucount;
  
+ /* representing cpus for which sibling maps can be computed */
+ static cpumask_t cpu_sibling_setup_map;
  static inline void
  set_cpu_sibling_map(int cpu)
  {
        int i;
+       struct cpuinfo_x86 *c = cpu_data;
+       cpu_set(cpu, cpu_sibling_setup_map);
  
        if (smp_num_siblings > 1) {
-               for (i = 0; i < NR_CPUS; i++) {
-                       if (!cpu_isset(i, cpu_callout_map))
-                               continue;
-                       if (cpu_core_id[cpu] == cpu_core_id[i]) {
+               for_each_cpu_mask(i, cpu_sibling_setup_map) {
+                       if (phys_proc_id[cpu] == phys_proc_id[i] &&
+                           cpu_core_id[cpu] == cpu_core_id[i]) {
                                cpu_set(i, cpu_sibling_map[cpu]);
                                cpu_set(cpu, cpu_sibling_map[i]);
+                               cpu_set(i, cpu_core_map[cpu]);
+                               cpu_set(cpu, cpu_core_map[i]);
                        }
                }
        } else {
                cpu_set(cpu, cpu_sibling_map[cpu]);
        }
  
-       if (current_cpu_data.x86_num_cores > 1) {
-               for (i = 0; i < NR_CPUS; i++) {
-                       if (!cpu_isset(i, cpu_callout_map))
-                               continue;
-                       if (phys_proc_id[cpu] == phys_proc_id[i]) {
-                               cpu_set(i, cpu_core_map[cpu]);
-                               cpu_set(cpu, cpu_core_map[i]);
-                       }
-               }
-       } else {
+       if (current_cpu_data.x86_max_cores == 1) {
                cpu_core_map[cpu] = cpu_sibling_map[cpu];
+               c[cpu].booted_cores = 1;
+               return;
+       }
+       for_each_cpu_mask(i, cpu_sibling_setup_map) {
+               if (phys_proc_id[cpu] == phys_proc_id[i]) {
+                       cpu_set(i, cpu_core_map[cpu]);
+                       cpu_set(cpu, cpu_core_map[i]);
+                       /*
+                        *  Does this new cpu bringup a new core?
+                        */
+                       if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+                               /*
+                                * for each core in package, increment
+                                * the booted_cores for this new cpu
+                                */
+                               if (first_cpu(cpu_sibling_map[i]) == i)
+                                       c[cpu].booted_cores++;
+                               /*
+                                * increment the core count for all
+                                * the other cpus in this package
+                                */
+                               if (i != cpu)
+                                       c[i].booted_cores++;
+                       } else if (i != cpu && !c[cpu].booted_cores)
+                               c[cpu].booted_cores = c[i].booted_cores;
+               }
        }
  }
  
@@@ -485,7 -514,6 +512,7 @@@ static void __devinit start_secondary(v
         * things done here to the most necessary things.
         */
        cpu_init();
 +      preempt_disable();
        smp_callin();
        while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
                rep_nop();
@@@ -611,7 -639,7 +638,7 @@@ static inline void __inquire_remote_api
  
        printk("Inquiring remote APIC #%d...\n", apicid);
  
 -      for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
 +      for (i = 0; i < ARRAY_SIZE(regs); i++) {
                printk("... APIC #%d %s: ", apicid, names[i]);
  
                /*
@@@ -1095,11 -1123,8 +1122,8 @@@ static void __init smp_boot_cpus(unsign
  
        current_thread_info()->cpu = 0;
        smp_tune_scheduling();
-       cpus_clear(cpu_sibling_map[0]);
-       cpu_set(0, cpu_sibling_map[0]);
  
-       cpus_clear(cpu_core_map[0]);
-       cpu_set(0, cpu_core_map[0]);
+       set_cpu_sibling_map(0);
  
        /*
         * If we couldn't find an SMP configuration at boot time,
@@@ -1278,15 -1303,24 +1302,24 @@@ static voi
  remove_siblinginfo(int cpu)
  {
        int sibling;
+       struct cpuinfo_x86 *c = cpu_data;
  
+       for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+               cpu_clear(cpu, cpu_core_map[sibling]);
+               /*
+                * last thread sibling in this cpu core going down
+                */
+               if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+                       c[sibling].booted_cores--;
+       }
+                       
        for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
                cpu_clear(cpu, cpu_sibling_map[sibling]);
-       for_each_cpu_mask(sibling, cpu_core_map[cpu])
-               cpu_clear(cpu, cpu_core_map[sibling]);
        cpus_clear(cpu_sibling_map[cpu]);
        cpus_clear(cpu_core_map[cpu]);
        phys_proc_id[cpu] = BAD_APICID;
        cpu_core_id[cpu] = BAD_APICID;
+       cpu_clear(cpu, cpu_sibling_setup_map);
  }
  
  int __cpu_disable(void)
diff --combined arch/ia64/Kconfig
index 8796e12c56f3f08b504f3e539249c0767e880870,3dba24c318b7cc7b81a3157169deed2bebd84c83..b76ce1fe2e7f31c04858901fac13dd66e7efff29
@@@ -58,6 -58,10 +58,10 @@@ config IA64_UNCACHED_ALLOCATO
        bool
        select GENERIC_ALLOCATOR
  
+ config ZONE_DMA_IS_DMA32
+       bool
+       default y
  choice
        prompt "System type"
        default IA64_GENERIC
@@@ -164,19 -168,6 +168,19 @@@ config IA64_PAGE_SIZE_64K
  
  endchoice
  
 +choice
 +      prompt "Page Table Levels"
 +      default PGTABLE_3
 +
 +config PGTABLE_3
 +      bool "3 Levels"
 +
 +config PGTABLE_4
 +      depends on !IA64_PAGE_SIZE_64KB
 +      bool "4 Levels"
 +
 +endchoice
 +
  source kernel/Kconfig.hz
  
  config IA64_BRL_EMU
@@@ -204,7 -195,6 +208,7 @@@ config IOSAPI
  
  config IA64_SGI_SN_XP
        tristate "Support communication between SGI SSIs"
 +      depends on IA64_GENERIC || IA64_SGI_SN2
        select IA64_UNCACHED_ALLOCATOR
        help
          An SGI machine can be divided into multiple Single System
@@@ -440,21 -430,8 +444,21 @@@ config GENERIC_PENDING_IR
  
  source "arch/ia64/hp/sim/Kconfig"
  
 +menu "Instrumentation Support"
 +        depends on EXPERIMENTAL
 +
  source "arch/ia64/oprofile/Kconfig"
  
 +config KPROBES
 +      bool "Kprobes (EXPERIMENTAL)"
 +      help
 +        Kprobes allows you to trap at almost any kernel address and
 +        execute a callback function.  register_kprobe() establishes
 +        a probepoint and specifies the callback.  Kprobes is useful
 +        for kernel debugging, non-intrusive instrumentation and testing.
 +        If in doubt, say "N".
 +endmenu
 +
  source "arch/ia64/Kconfig.debug"
  
  source "security/Kconfig"
diff --combined arch/x86_64/Kconfig
index 4cce2f6f170c0712dda1a1e6f60cbf545def6b70,1d6242a5cd0aec3c16be94b00af6383d5c76c15b..6ece645e4dbea691191ccdaa8d49ec42025ab25f
@@@ -226,22 -226,42 +226,42 @@@ config SCHED_SM
  
  source "kernel/Kconfig.preempt"
  
- config K8_NUMA
-        bool "K8 NUMA support"
-        select NUMA
+ config NUMA
+        bool "Non Uniform Memory Access (NUMA) Support"
         depends on SMP
         help
-         Enable NUMA (Non Unified Memory Architecture) support for
-         AMD Opteron Multiprocessor systems. The kernel will try to allocate
-         memory used by a CPU on the local memory controller of the CPU
-         and add some more NUMA awareness to the kernel.
-         This code is recommended on all multiprocessor Opteron systems
-         and normally doesn't hurt on others.
+        Enable NUMA (Non Uniform Memory Access) support. The kernel 
+        will try to allocate memory used by a CPU on the local memory 
+        controller of the CPU and add some more NUMA awareness to the kernel.
+        This code is recommended on all multiprocessor Opteron systems.
+        If the system is EM64T, you should say N unless your system is EM64T 
+        NUMA. 
+ config K8_NUMA
+        bool "Old style AMD Opteron NUMA detection"
+        depends on NUMA
+        default y
+        help
+        Enable K8 NUMA node topology detection.  You should say Y here if
+        you have a multi processor AMD K8 system. This uses an old
+        method to read the NUMA configurtion directly from the builtin
+        Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
+        instead, which also takes priority if both are compiled in.   
+ # Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
+ config X86_64_ACPI_NUMA
+        bool "ACPI NUMA detection"
+        depends on NUMA
+        select ACPI 
+        select ACPI_NUMA
+        default y
+        help
+        Enable ACPI SRAT based node topology detection.
  
  config NUMA_EMU
-       bool "NUMA emulation support"
-       select NUMA
-       depends on SMP
+       bool "NUMA emulation"
+       depends on NUMA
        help
          Enable NUMA emulation. A flat machine will be split
          into virtual nodes when booted with "numa=fake=N", where N is the
@@@ -252,9 -272,6 +272,6 @@@ config ARCH_DISCONTIGMEM_ENABL
         depends on NUMA
         default y
  
- config NUMA
-        bool
-        default n
  
  config ARCH_DISCONTIGMEM_ENABLE
        def_bool y
@@@ -374,6 -391,14 +391,14 @@@ config X86_MCE_INTE
           Additional support for intel specific MCE features such as
           the thermal monitor.
  
+ config X86_MCE_AMD
+       bool "AMD MCE features"
+       depends on X86_MCE && X86_LOCAL_APIC
+       default y
+       help
+          Additional support for AMD specific MCE features such as
+          the DRAM Error Threshold.
  config PHYSICAL_START
        hex "Physical address where the kernel is loaded" if EMBEDDED
        default "0x100000"
@@@ -502,7 -527,7 +527,7 @@@ config IA32_EMULATIO
          left.
  
  config IA32_AOUT
-        bool "IA32 a.out support"
+        tristate "IA32 a.out support"
         depends on IA32_EMULATION
         help
           Support old a.out binaries in the 32bit emulation.
@@@ -532,21 -557,8 +557,21 @@@ source "drivers/firmware/Kconfig
  
  source fs/Kconfig
  
 +menu "Instrumentation Support"
 +        depends on EXPERIMENTAL
 +
  source "arch/x86_64/oprofile/Kconfig"
  
 +config KPROBES
 +      bool "Kprobes (EXPERIMENTAL)"
 +      help
 +        Kprobes allows you to trap at almost any kernel address and
 +        execute a callback function.  register_kprobe() establishes
 +        a probepoint and specifies the callback.  Kprobes is useful
 +        for kernel debugging, non-intrusive instrumentation and testing.
 +        If in doubt, say "N".
 +endmenu
 +
  source "arch/x86_64/Kconfig.debug"
  
  source "security/Kconfig"
index d584ecc27ea1902e038329335486a60b8e72f9eb,3ccf6f4d1068523876634bf36e37f2e1f5699624..e2c6e64a85ec27318d0a76d51e421301e121dd3e
@@@ -2,15 -2,6 +2,6 @@@ menu "Kernel hacking
  
  source "lib/Kconfig.debug"
  
- # !SMP for now because the context switch early causes GPF in segment reloading
- # and the GS base checking does the wrong thing then, causing a hang.
- config CHECKING
-       bool "Additional run-time checks"
-       depends on DEBUG_KERNEL && !SMP
-       help
-         Enables some internal consistency checks for kernel debugging.
-         You should normally say N.
  config INIT_DEBUG
        bool "Debug __init statements"
        depends on DEBUG_KERNEL
@@@ -33,6 -24,16 +24,6 @@@ config IOMMU_DEBU
         options. See Documentation/x86_64/boot-options.txt for more
         details.
  
 -config KPROBES
 -      bool "Kprobes"
 -      depends on DEBUG_KERNEL
 -      help
 -        Kprobes allows you to trap at almost any kernel address and
 -        execute a callback function.  register_kprobe() establishes
 -        a probepoint and specifies the callback.  Kprobes is useful
 -        for kernel debugging, non-intrusive instrumentation and testing.
 -        If in doubt, say "N".
 -
  config IOMMU_LEAK
         bool "IOMMU leak tracing"
         depends on DEBUG_KERNEL
index a9368d4c4aba1ceba5df433b9fdfb90713f1f543,5de30035e54bedaa5d420c2469e7a74db13718ed..6e5101ad3d1a075290e3acb298b78e0f5e430c8d
@@@ -492,6 -492,7 +492,7 @@@ void invalidate_interrupt5(void)
  void invalidate_interrupt6(void);
  void invalidate_interrupt7(void);
  void thermal_interrupt(void);
+ void threshold_interrupt(void);
  void i8254_timer_resume(void);
  
  static void setup_timer_hardware(void)
@@@ -515,7 -516,7 +516,7 @@@ void i8254_timer_resume(void
  }
  
  static struct sysdev_class timer_sysclass = {
 -      set_kset_name("timer"),
 +      set_kset_name("timer_pit"),
        .resume         = timer_resume,
  };
  
@@@ -580,6 -581,7 +581,7 @@@ void __init init_IRQ(void
        set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
  #endif        
        set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+       set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
  
  #ifdef CONFIG_X86_LOCAL_APIC
        /* self generated IPI for local APIC timer */
index 59be85d9a4bc166284ac102aea770d52c77a69cf,bc44c92ca1981102ce239889b14ab9e1f71b31f7..5afd63e8cef7ab9c9a71bb1f35df69a738ce90d5
@@@ -86,22 -86,12 +86,22 @@@ EXPORT_SYMBOL(enable_hlt)
   */
  void default_idle(void)
  {
 +      local_irq_enable();
 +
        if (!atomic_read(&hlt_counter)) {
 -              local_irq_disable();
 -              if (!need_resched())
 -                      safe_halt();
 -              else
 -                      local_irq_enable();
 +              clear_thread_flag(TIF_POLLING_NRFLAG);
 +              smp_mb__after_clear_bit();
 +              while (!need_resched()) {
 +                      local_irq_disable();
 +                      if (!need_resched())
 +                              safe_halt();
 +                      else
 +                              local_irq_enable();
 +              }
 +              set_thread_flag(TIF_POLLING_NRFLAG);
 +      } else {
 +              while (!need_resched())
 +                      cpu_relax();
        }
  }
  
   */
  static void poll_idle (void)
  {
 -      int oldval;
 -
        local_irq_enable();
  
 -      /*
 -       * Deal with another CPU just having chosen a thread to
 -       * run here:
 -       */
 -      oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
 -
 -      if (!oldval) {
 -              set_thread_flag(TIF_POLLING_NRFLAG); 
 -              asm volatile(
 -                      "2:"
 -                      "testl %0,%1;"
 -                      "rep; nop;"
 -                      "je 2b;"
 -                      : :
 -                      "i" (_TIF_NEED_RESCHED), 
 -                      "m" (current_thread_info()->flags));
 -              clear_thread_flag(TIF_POLLING_NRFLAG);
 -      } else {
 -              set_need_resched();
 -      }
 +      asm volatile(
 +              "2:"
 +              "testl %0,%1;"
 +              "rep; nop;"
 +              "je 2b;"
 +              : :
 +              "i" (_TIF_NEED_RESCHED),
 +              "m" (current_thread_info()->flags));
  }
  
  void cpu_idle_wait(void)
        do {
                ssleep(1);
                for_each_online_cpu(cpu) {
-                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+                       if (cpu_isset(cpu, map) &&
+                                       !per_cpu(cpu_idle_state, cpu))
                                cpu_clear(cpu, map);
                }
                cpus_and(map, map, cpu_online_map);
@@@ -183,8 -188,6 +184,8 @@@ static inline void play_dead(void
   */
  void cpu_idle (void)
  {
 +      set_thread_flag(TIF_POLLING_NRFLAG);
 +
        /* endless idle loop with no priority at all */
        while (1) {
                while (!need_resched()) {
                        idle();
                }
  
 +              preempt_enable_no_resched();
                schedule();
 +              preempt_disable();
        }
  }
  
@@@ -219,12 -220,15 +220,12 @@@ static void mwait_idle(void
  {
        local_irq_enable();
  
 -      if (!need_resched()) {
 -              set_thread_flag(TIF_POLLING_NRFLAG);
 -              do {
 -                      __monitor((void *)&current_thread_info()->flags, 0, 0);
 -                      if (need_resched())
 -                              break;
 -                      __mwait(0, 0);
 -              } while (!need_resched());
 -              clear_thread_flag(TIF_POLLING_NRFLAG);
 +      while (!need_resched()) {
 +              __monitor((void *)&current_thread_info()->flags, 0, 0);
 +              smp_mb();
 +              if (need_resched())
 +                      break;
 +              __mwait(0, 0);
        }
  }
  
@@@ -275,7 -279,8 +276,8 @@@ void __show_regs(struct pt_regs * regs
                system_utsname.version);
        printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
        printk_address(regs->rip); 
-       printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
+       printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
+               regs->eflags);
        printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
               regs->rax, regs->rbx, regs->rcx);
        printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
@@@ -427,15 -432,14 +429,14 @@@ int copy_thread(int nr, unsigned long c
        struct pt_regs * childregs;
        struct task_struct *me = current;
  
-       childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+       childregs = ((struct pt_regs *)
+                       (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
        *childregs = *regs;
  
        childregs->rax = 0;
        childregs->rsp = rsp;
-       if (rsp == ~0UL) {
+       if (rsp == ~0UL)
                childregs->rsp = (unsigned long)childregs;
-       }
  
        p->thread.rsp = (unsigned long) childregs;
        p->thread.rsp0 = (unsigned long) (childregs+1);
                        p->thread.io_bitmap_max = 0;
                        return -ENOMEM;
                }
-               memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
+               memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+                               IO_BITMAP_BYTES);
        } 
  
        /*
@@@ -494,7 -499,8 +496,8 @@@ out
   * - fold all the options into a flag word and test it with a single test.
   * - could test fs/gs bitsliced
   */
- struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ struct task_struct *
+ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
  {
        struct thread_struct *prev = &prev_p->thread,
                                 *next = &next_p->thread;
        prev->userrsp = read_pda(oldrsp); 
        write_pda(oldrsp, next->userrsp); 
        write_pda(pcurrent, next_p); 
-       write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
+       write_pda(kernelstack,
+           (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
  
        /*
         * Now maybe reload the debug registers
@@@ -646,7 -653,9 +650,9 @@@ asmlinkage long sys_fork(struct pt_reg
        return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
  }
  
- asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+ asmlinkage long
+ sys_clone(unsigned long clone_flags, unsigned long newsp,
+         void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
  {
        if (!newsp)
                newsp = regs->rsp;
@@@ -682,7 -691,8 +688,8 @@@ unsigned long get_wchan(struct task_str
                return 0;
        fp = *(u64 *)(p->thread.rsp);
        do { 
-               if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
+               if (fp < (unsigned long)stack ||
+                   fp > (unsigned long)stack+THREAD_SIZE)
                        return 0; 
                rip = *(u64 *)(fp+8); 
                if (!in_sched_functions(rip))
@@@ -717,8 -727,8 +724,8 @@@ long do_arch_prctl(struct task_struct *
                        task->thread.gsindex = 0;
                        task->thread.gs = addr;
                        if (doit) {
-               load_gs_index(0);
-               ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 
+                               load_gs_index(0);
+                               ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
                        } 
                }
                put_cpu();
                        set_32bit_tls(task, FS_TLS, addr);
                        if (doit) { 
                                load_TLS(&task->thread, cpu); 
-                               asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
+                               asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
                        }
                        task->thread.fsindex = FS_TLS_SEL;
                        task->thread.fs = 0;
                        if (doit) {
                                /* set the selector to 0 to not confuse
                                   __switch_to */
-               asm volatile("movl %0,%%fs" :: "r" (0));
-               ret = checking_wrmsrl(MSR_FS_BASE, addr); 
+                               asm volatile("movl %0,%%fs" :: "r" (0));
+                               ret = checking_wrmsrl(MSR_FS_BASE, addr);
                        }
                }
                put_cpu();
                unsigned long base; 
                if (task->thread.fsindex == FS_TLS_SEL)
                        base = read_32bit_tls(task, FS_TLS);
-               else if (doit) {
+               else if (doit)
                        rdmsrl(MSR_FS_BASE, base);
-               else
+               else
                        base = task->thread.fs;
                ret = put_user(base, (unsigned long __user *)addr); 
                break; 
                unsigned long base;
                if (task->thread.gsindex == GS_TLS_SEL)
                        base = read_32bit_tls(task, GS_TLS);
-               else if (doit) {
+               else if (doit)
                        rdmsrl(MSR_KERNEL_GS_BASE, base);
-               else
+               else
                        base = task->thread.gs;
                ret = put_user(base, (unsigned long __user *)addr); 
                break;
index c4e59bbdc1872b4ce599f452b125183603cf8f8e,2b9ddba61b37a9e675e830b2172854ea565303b3..683c33f7b96753219a2c1043246dcdfac12b999e
  int smp_num_siblings = 1;
  /* Package ID of each logical CPU */
  u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+ /* core ID of each logical CPU */
  u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 -EXPORT_SYMBOL(phys_proc_id);
 -EXPORT_SYMBOL(cpu_core_id);
  
  /* Bitmask of currently online CPUs */
  cpumask_t cpu_online_map __read_mostly;
@@@ -87,7 -90,10 +88,10 @@@ struct cpuinfo_x86 cpu_data[NR_CPUS] __
  /* Set when the idlers are all forked */
  int smp_threads_ready;
  
+ /* representing HT siblings of each logical CPU */
  cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+ /* representing HT and core siblings of each logical CPU */
  cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
  EXPORT_SYMBOL(cpu_core_map);
  
@@@ -434,30 -440,59 +438,59 @@@ void __cpuinit smp_callin(void
        cpu_set(cpuid, cpu_callin_map);
  }
  
+ /* representing cpus for which sibling maps can be computed */
+ static cpumask_t cpu_sibling_setup_map;
  static inline void set_cpu_sibling_map(int cpu)
  {
        int i;
+       struct cpuinfo_x86 *c = cpu_data;
+       cpu_set(cpu, cpu_sibling_setup_map);
  
        if (smp_num_siblings > 1) {
-               for_each_cpu(i) {
-                       if (cpu_core_id[cpu] == cpu_core_id[i]) {
+               for_each_cpu_mask(i, cpu_sibling_setup_map) {
+                       if (phys_proc_id[cpu] == phys_proc_id[i] &&
+                           cpu_core_id[cpu] == cpu_core_id[i]) {
                                cpu_set(i, cpu_sibling_map[cpu]);
                                cpu_set(cpu, cpu_sibling_map[i]);
+                               cpu_set(i, cpu_core_map[cpu]);
+                               cpu_set(cpu, cpu_core_map[i]);
                        }
                }
        } else {
                cpu_set(cpu, cpu_sibling_map[cpu]);
        }
  
-       if (current_cpu_data.x86_num_cores > 1) {
-               for_each_cpu(i) {
-                       if (phys_proc_id[cpu] == phys_proc_id[i]) {
-                               cpu_set(i, cpu_core_map[cpu]);
-                               cpu_set(cpu, cpu_core_map[i]);
-                       }
-               }
-       } else {
+       if (current_cpu_data.x86_max_cores == 1) {
                cpu_core_map[cpu] = cpu_sibling_map[cpu];
+               c[cpu].booted_cores = 1;
+               return;
+       }
+       for_each_cpu_mask(i, cpu_sibling_setup_map) {
+               if (phys_proc_id[cpu] == phys_proc_id[i]) {
+                       cpu_set(i, cpu_core_map[cpu]);
+                       cpu_set(cpu, cpu_core_map[i]);
+                       /*
+                        *  Does this new cpu bringup a new core?
+                        */
+                       if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+                               /*
+                                * for each core in package, increment
+                                * the booted_cores for this new cpu
+                                */
+                               if (first_cpu(cpu_sibling_map[i]) == i)
+                                       c[cpu].booted_cores++;
+                               /*
+                                * increment the core count for all
+                                * the other cpus in this package
+                                */
+                               if (i != cpu)
+                                       c[i].booted_cores++;
+                       } else if (i != cpu && !c[cpu].booted_cores)
+                               c[cpu].booted_cores = c[i].booted_cores;
+               }
        }
  }
  
@@@ -472,7 -507,6 +505,7 @@@ void __cpuinit start_secondary(void
         * things done here to the most necessary things.
         */
        cpu_init();
 +      preempt_disable();
        smp_callin();
  
        /* otherwise gcc will move up the smp_processor_id before the cpu_init */
@@@ -879,6 -913,9 +912,9 @@@ static __init void disable_smp(void
  }
  
  #ifdef CONFIG_HOTPLUG_CPU
+ int additional_cpus __initdata = -1;
  /*
   * cpu_possible_map should be static, it cannot change as cpu's
   * are onlined, or offlined. The reason is per-cpu data-structures
   * cpu_present_map on the other hand can change dynamically.
   * In case when cpu_hotplug is not compiled, then we resort to current
   * behaviour, which is cpu_possible == cpu_present.
-  * If cpu-hotplug is supported, then we need to preallocate for all
-  * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
   * - Ashok Raj
+  *
+  * Three ways to find out the number of additional hotplug CPUs:
+  * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
+  * - otherwise use half of the available CPUs or 2, whatever is more.
+  * - The user can overwrite it with additional_cpus=NUM
+  * We do this because additional CPUs waste a lot of memory.
+  * -AK
   */
  __init void prefill_possible_map(void)
  {
        int i;
-       for (i = 0; i < NR_CPUS; i++)
+       int possible;
+       if (additional_cpus == -1) {
+               if (disabled_cpus > 0) {
+                       additional_cpus = disabled_cpus;
+               } else {
+                       additional_cpus = num_processors / 2;
+                       if (additional_cpus == 0)
+                               additional_cpus = 2;
+               }
+       }
+       possible = num_processors + additional_cpus;
+       if (possible > NR_CPUS) 
+               possible = NR_CPUS;
+       printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
+               possible,
+               max_t(int, possible - num_processors, 0));
+       for (i = 0; i < possible; i++)
                cpu_set(i, cpu_possible_map);
  }
  #endif
@@@ -965,6 -1026,7 +1025,7 @@@ void __init smp_prepare_cpus(unsigned i
        nmi_watchdog_default();
        current_cpu_data = boot_cpu_data;
        current_thread_info()->cpu = 0;  /* needed? */
+       set_cpu_sibling_map(0);
  
        if (smp_sanity_check(max_cpus) < 0) {
                printk(KERN_INFO "SMP disabled\n");
@@@ -1008,8 -1070,6 +1069,6 @@@ void __init smp_prepare_boot_cpu(void
        int me = smp_processor_id();
        cpu_set(me, cpu_online_map);
        cpu_set(me, cpu_callout_map);
-       cpu_set(0, cpu_sibling_map[0]);
-       cpu_set(0, cpu_core_map[0]);
        per_cpu(cpu_state, me) = CPU_ONLINE;
  }
  
@@@ -1062,9 -1122,6 +1121,6 @@@ int __cpuinit __cpu_up(unsigned int cpu
   */
  void __init smp_cpus_done(unsigned int max_cpus)
  {
- #ifndef CONFIG_HOTPLUG_CPU
-       zap_low_mappings();
- #endif
        smp_cleanup_boot();
  
  #ifdef CONFIG_X86_IO_APIC
  static void remove_siblinginfo(int cpu)
  {
        int sibling;
+       struct cpuinfo_x86 *c = cpu_data;
  
+       for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+               cpu_clear(cpu, cpu_core_map[sibling]);
+               /*
+                * last thread sibling in this cpu core going down
+                */
+               if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+                       c[sibling].booted_cores--;
+       }
+                       
        for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
                cpu_clear(cpu, cpu_sibling_map[sibling]);
-       for_each_cpu_mask(sibling, cpu_core_map[cpu])
-               cpu_clear(cpu, cpu_core_map[sibling]);
        cpus_clear(cpu_sibling_map[cpu]);
        cpus_clear(cpu_core_map[cpu]);
        phys_proc_id[cpu] = BAD_APICID;
        cpu_core_id[cpu] = BAD_APICID;
+       cpu_clear(cpu, cpu_sibling_setup_map);
  }
  
  void remove_cpu_from_maps(void)
@@@ -1153,6 -1219,12 +1218,12 @@@ void __cpu_die(unsigned int cpu
        printk(KERN_ERR "CPU %u didn't die...\n", cpu);
  }
  
+ static __init int setup_additional_cpus(char *s)
+ {
+       return get_option(&s, &additional_cpus);
+ }
+ __setup("additional_cpus=", setup_additional_cpus);
  #else /* ... !CONFIG_HOTPLUG_CPU */
  
  int __cpu_disable(void)
index 78ce98a69f37044dfe17c98d6f264f3989535a23,49996c692a734cdb8784cc247871d8c73cb99bb7..76589782adcbf2f2f2372b919c6b49490bc4d593
@@@ -13,7 -13,6 +13,7 @@@
  #include <linux/pci.h>
  #include <linux/init.h>
  #include <linux/agp_backend.h>
 +#include <linux/mmzone.h>
  #include <asm/page.h>         /* PAGE_SIZE */
  #include "agp.h"
  
@@@ -57,9 -56,8 +57,8 @@@ static int nr_garts
  static struct pci_dev * hammers[MAX_HAMMER_GARTS];
  
  static struct resource *aperture_resource;
- static int __initdata agp_try_unsupported;
+ static int __initdata agp_try_unsupported = 1;
  
- static int gart_iterator;
  #define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++)
  
  static void flush_amd64_tlb(struct pci_dev *dev)
@@@ -73,6 -71,7 +72,7 @@@
  
  static void amd64_tlbflush(struct agp_memory *temp)
  {
+       int gart_iterator;
        for_each_nb()
                flush_amd64_tlb(hammers[gart_iterator]);
  }
@@@ -222,6 -221,7 +222,7 @@@ static struct aper_size_info_32 amd_815
  static int amd_8151_configure(void)
  {
        unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
+       int gart_iterator;
  
        /* Configure AGP regs in each x86-64 host bridge. */
        for_each_nb() {
  static void amd64_cleanup(void)
  {
        u32 tmp;
+       int gart_iterator;
        for_each_nb() {
                /* disable gart translation */
                pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp);
@@@ -697,6 -697,16 +698,16 @@@ static struct pci_device_id agp_amd64_p
        .subvendor      = PCI_ANY_ID,
        .subdevice      = PCI_ANY_ID,
        },
+       /* ALI/ULI M1695 */
+       {
+       .class          = (PCI_CLASS_BRIDGE_HOST << 8),
+       .class_mask     = ~0,
+       .vendor         = PCI_VENDOR_ID_AL,
+       .device         = 0x1689,
+       .subvendor      = PCI_ANY_ID,
+       .subdevice      = PCI_ANY_ID,
+       },
        { }
  };
  
index 8c02b0318703b4c9cf8d68e665949215f97455d6,9cd4a05234a16a2037df6575684b43d84186aa90..5c96cf6dcb3916f5735cdf6005b2ac7b3755cd4d
@@@ -65,7 -65,9 +65,9 @@@ struct cpuinfo_x86 
        int     f00f_bug;
        int     coma_bug;
        unsigned long loops_per_jiffy;
-       unsigned char x86_num_cores;
+       unsigned char x86_max_cores;    /* cpuid returned max cores value */
+       unsigned char booted_cores;     /* number of cores as seen by OS */
+       unsigned char apicid;
  } __attribute__((__aligned__(SMP_CACHE_BYTES)));
  
  #define X86_VENDOR_INTEL 0
@@@ -718,10 -720,4 +720,10 @@@ extern void mtrr_bp_init(void)
  #define mtrr_bp_init() do {} while (0)
  #endif
  
 +#ifdef CONFIG_X86_MCE
 +extern void mcheck_init(struct cpuinfo_x86 *c);
 +#else
 +#define mcheck_init(c) do {} while(0)
 +#endif
 +
  #endif /* __ASM_I386_PROCESSOR_H */
index b837820c90733c51534d7eb100a160f19e251fe5,1a3d380f9d5df87335a16abd6923d26f6f6792f0..33764869387bc1c5ad9a707df503e5eef0c8bedf
@@@ -98,16 -98,19 +98,19 @@@ static inline void _set_gate(void *adr
  
  static inline void set_intr_gate(int nr, void *func) 
  { 
+       BUG_ON((unsigned)nr > 0xFF);
        _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); 
  } 
  
  static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 
  { 
+       BUG_ON((unsigned)nr > 0xFF);
        _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); 
  } 
  
  static inline void set_system_gate(int nr, void *func) 
  { 
+       BUG_ON((unsigned)nr > 0xFF);
        _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
  } 
  
@@@ -129,16 -132,9 +132,16 @@@ static inline void set_tssldt_descripto
  
  static inline void set_tss_desc(unsigned cpu, void *addr)
  { 
 -      set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (unsigned long)addr, 
 -                            DESC_TSS,
 -                            sizeof(struct tss_struct) - 1);
 +      /*
 +       * sizeof(unsigned long) coming from an extra "long" at the end
 +       * of the iobitmap. See tss_struct definition in processor.h
 +       *
 +       * -1? seg base+limit should be pointing to the address of the
 +       * last valid byte
 +       */
 +      set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS],
 +              (unsigned long)addr, DESC_TSS,
 +              IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
  } 
  
  static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
index 7309fffeec9a04fe0b45abd0b7a39060a9abe19d,f8e87a57f3a7084879f516424b66f6230a2f9c5a..ecf58c7c16500908f40d93f76e5f00959d3ac9ca
@@@ -16,6 -16,7 +16,7 @@@ extern pud_t level3_physmem_pgt[512]
  extern pud_t level3_ident_pgt[512];
  extern pmd_t level2_kernel_pgt[512];
  extern pgd_t init_level4_pgt[];
+ extern pgd_t boot_level4_pgt[];
  extern unsigned long __supported_pte_mask;
  
  #define swapper_pg_dir init_level4_pgt
@@@ -105,8 -106,6 +106,8 @@@ static inline void pgd_clear (pgd_t * p
  
  #define ptep_get_and_clear(mm,addr,xp)        __pte(xchg(&(xp)->pte, 0))
  
 +struct mm_struct;
 +
  static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
  {
        pte_t pte;
@@@ -247,7 -246,7 +248,7 @@@ static inline unsigned long pud_bad(pud
  #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))       /* FIXME: is this
                                                   right? */
  #define pte_page(x)   pfn_to_page(pte_pfn(x))
- #define pte_pfn(x)  ((pte_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+ #define pte_pfn(x)  ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
  
  static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
  {
@@@ -354,7 -353,7 +355,7 @@@ static inline pud_t *__pud_offset_k(pud
  #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
  #define       pmd_bad(x)      ((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE )
  #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
- #define pmd_pfn(x)  ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+ #define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
  
  #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
  #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
diff --combined include/asm-x86_64/smp.h
index b9fb2173ef99ee5368860a298a2d9a5c06162c5c,cf8f969f90209941ff56f5d9e504b9d1895fa07b..d030409a8fb5c61a2663249894834384d106f7f1
@@@ -47,7 -47,6 +47,6 @@@ extern void lock_ipi_call_lock(void)
  extern void unlock_ipi_call_lock(void);
  extern int smp_num_siblings;
  extern void smp_send_reschedule(int cpu);
- extern void zap_low_mappings(void);
  void smp_stop_cpu(void);
  extern int smp_call_function_single(int cpuid, void (*func) (void *info),
                                void *info, int retry, int wait);
@@@ -82,6 -81,8 +81,8 @@@ extern int safe_smp_processor_id(void)
  extern int __cpu_disable(void);
  extern void __cpu_die(unsigned int cpu);
  extern void prefill_possible_map(void);
+ extern unsigned num_processors;
+ extern unsigned disabled_cpus;
  
  #endif /* !ASSEMBLY */
  
@@@ -135,11 -136,5 +136,11 @@@ static __inline int logical_smp_process
  }
  #endif
  
 +#ifdef CONFIG_SMP
 +#define cpu_physical_id(cpu)          x86_cpu_to_apicid[cpu]
 +#else
 +#define cpu_physical_id(cpu)          boot_cpu_id
 +#endif
 +
  #endif
  
diff --combined include/linux/gfp.h
index 23279d8f19b1a05cd5a09b9403bea9a17ed8baca,4351e6bb5a799033fb0218ba507bddfa774d64c1..313dfe9b443abb0d98b890aae74158e580db3ba8
@@@ -14,6 -14,13 +14,13 @@@ struct vm_area_struct
  /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
  #define __GFP_DMA     ((__force gfp_t)0x01u)
  #define __GFP_HIGHMEM ((__force gfp_t)0x02u)
+ #ifdef CONFIG_DMA_IS_DMA32
+ #define __GFP_DMA32   ((__force gfp_t)0x01)   /* ZONE_DMA is ZONE_DMA32 */
+ #elif BITS_PER_LONG < 64
+ #define __GFP_DMA32   ((__force gfp_t)0x00)   /* ZONE_NORMAL is ZONE_DMA32 */
+ #else
+ #define __GFP_DMA32   ((__force gfp_t)0x04)   /* Has own ZONE_DMA32 */
+ #endif
  
  /*
   * Action modifiers - doesn't change the zoning
@@@ -39,7 -46,8 +46,7 @@@
  #define __GFP_COMP    ((__force gfp_t)0x4000u)/* Add compound page metadata */
  #define __GFP_ZERO    ((__force gfp_t)0x8000u)/* Return zeroed page on success */
  #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 -#define __GFP_NORECLAIM  ((__force gfp_t)0x20000u) /* No realy zone reclaim during allocation */
 -#define __GFP_HARDWALL   ((__force gfp_t)0x40000u) /* Enforce hardwall cpuset memory allocs */
 +#define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
  
  #define __GFP_BITS_SHIFT 20   /* Room for 20 __GFP_FOO bits */
  #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@@ -48,7 -56,7 +55,7 @@@
  #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
                        __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
                        __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
 -                      __GFP_NOMEMALLOC|__GFP_NORECLAIM|__GFP_HARDWALL)
 +                      __GFP_NOMEMALLOC|__GFP_HARDWALL)
  
  #define GFP_ATOMIC    (__GFP_HIGH)
  #define GFP_NOIO      (__GFP_WAIT)
  
  #define GFP_DMA               __GFP_DMA
  
+ /* 4GB DMA on some platforms */
+ #define GFP_DMA32     __GFP_DMA32
  #define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK))
  
  /*
diff --combined include/linux/mm.h
index 7b115feca4df23064dfd3ee80a66a9e638b0dcbd,23fad4dae23cd5973844573e0aaa0aa40ef4413d..1013a42d10b15c1e82d9a7cc5d30658e55284b19
@@@ -206,12 -206,6 +206,6 @@@ struct vm_operations_struct 
  struct mmu_gather;
  struct inode;
  
- #ifdef ARCH_HAS_ATOMIC_UNSIGNED
- typedef unsigned page_flags_t;
- #else
- typedef unsigned long page_flags_t;
- #endif
  /*
   * Each physical page in the system has a struct page associated with
   * it to keep track of whatever it is we are using the page for at the
   * a page.
   */
  struct page {
-       page_flags_t flags;             /* Atomic flags, some possibly
+       unsigned long flags;            /* Atomic flags, some possibly
                                         * updated asynchronously */
        atomic_t _count;                /* Usage count, see below. */
        atomic_t _mapcount;             /* Count of ptes mapped in mms,
@@@ -435,7 -429,7 +429,7 @@@ static inline void put_page(struct pag
  #endif
  
  /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
- #define SECTIONS_PGOFF                ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH)
+ #define SECTIONS_PGOFF                ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
  #define NODES_PGOFF           (SECTIONS_PGOFF - NODES_WIDTH)
  #define ZONES_PGOFF           (NODES_PGOFF - ZONES_WIDTH)
  
@@@ -932,13 -926,13 +926,13 @@@ int write_one_page(struct page *page, i
                                         * turning readahead off */
  
  int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 -                      unsigned long offset, unsigned long nr_to_read);
 +                      pgoff_t offset, unsigned long nr_to_read);
  int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 -                      unsigned long offset, unsigned long nr_to_read);
 -unsigned long  page_cache_readahead(struct address_space *mapping,
 +                      pgoff_t offset, unsigned long nr_to_read);
 +unsigned long page_cache_readahead(struct address_space *mapping,
                          struct file_ra_state *ra,
                          struct file *filp,
 -                        unsigned long offset,
 +                        pgoff_t offset,
                          unsigned long size);
  void handle_ra_miss(struct address_space *mapping, 
                    struct file_ra_state *ra, pgoff_t offset);
diff --combined include/linux/mmzone.h
index 6cfb114a0c34444756bda25bdc362dd673585dd2,f3cffc354deac72ae6e9d3a4aa5565696a93c5d7..2c8edad5dccf3796337dcb989f55815cd53faf51
@@@ -71,10 -71,11 +71,11 @@@ struct per_cpu_pageset 
  #endif
  
  #define ZONE_DMA              0
- #define ZONE_NORMAL           1
- #define ZONE_HIGHMEM          2
+ #define ZONE_DMA32            1
+ #define ZONE_NORMAL           2
+ #define ZONE_HIGHMEM          3
  
- #define MAX_NR_ZONES          3       /* Sync this with ZONES_SHIFT */
+ #define MAX_NR_ZONES          4       /* Sync this with ZONES_SHIFT */
  #define ZONES_SHIFT           2       /* ceil(log2(MAX_NR_ZONES)) */
  
  
  
  /*
   * On machines where it is needed (eg PCs) we divide physical memory
-  * into multiple physical zones. On a PC we have 3 zones:
+  * into multiple physical zones. On a PC we have 4 zones:
   *
   * ZONE_DMA     < 16 MB       ISA DMA capable memory
+  * ZONE_DMA32      0 MB       Empty
   * ZONE_NORMAL        16-896 MB       direct mapped by the kernel
   * ZONE_HIGHMEM        > 896 MB       only page cache and user processes
   */
@@@ -329,7 -331,7 +331,7 @@@ void get_zone_counts(unsigned long *act
  void build_all_zonelists(void);
  void wakeup_kswapd(struct zone *zone, int order);
  int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 -              int alloc_type, int can_try_harder, gfp_t gfp_high);
 +              int classzone_idx, int alloc_flags);
  
  #ifdef CONFIG_HAVE_MEMORY_PRESENT
  void memory_present(int nid, unsigned long start, unsigned long end);
@@@ -433,7 -435,9 +435,9 @@@ int lowmem_reserve_ratio_sysctl_handler
  
  #include <linux/topology.h>
  /* Returns the number of the current Node. */
+ #ifndef numa_node_id
  #define numa_node_id()                (cpu_to_node(raw_smp_processor_id()))
+ #endif
  
  #ifndef CONFIG_NEED_MULTIPLE_NODES
  
@@@ -453,12 -457,12 +457,12 @@@ extern struct pglist_data contig_page_d
  #include <asm/sparsemem.h>
  #endif
  
- #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)
+ #if BITS_PER_LONG == 32
  /*
-  * with 32 bit page->flags field, we reserve 8 bits for node/zone info.
-  * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes.
+  * with 32 bit page->flags field, we reserve 9 bits for node/zone info.
+  * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes.
   */
- #define FLAGS_RESERVED                8
+ #define FLAGS_RESERVED                9
  
  #elif BITS_PER_LONG == 64
  /*
diff --combined mm/page_alloc.c
index 3c5cf664abd2eca14d2613a57023dcfcac6b6fde,259a71bacca40f0995f31d92ba73ba9f1cdb20dc..104e69ca55e0117c5ed596e9c96a01a5336a3fd8
@@@ -60,10 -60,14 +60,13 @@@ long nr_swap_pages
   *    NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
   *    HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
   *    HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
+  *
+  * TBD: should special case ZONE_DMA32 machines here - in those we normally
+  * don't need any ZONE_NORMAL reservation
   */
- int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
  
  EXPORT_SYMBOL(totalram_pages);
 -EXPORT_SYMBOL(nr_swap_pages);
  
  /*
   * Used by page_zone() to look up the address of the struct zone whose
@@@ -72,7 -76,7 +75,7 @@@
  struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
  EXPORT_SYMBOL(zone_table);
  
- static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
+ static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
  int min_free_kbytes = 1024;
  
  unsigned long __initdata nr_kernel_pages;
@@@ -124,7 -128,7 +127,7 @@@ static void bad_page(const char *functi
        printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
                function, current->comm, page);
        printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
-               (int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
+               (int)(2*sizeof(unsigned long)), (unsigned long)page->flags,
                page->mapping, page_mapcount(page), page_count(page));
        printk(KERN_EMERG "Backtrace:\n");
        dump_stack();
@@@ -732,7 -736,9 +735,7 @@@ buffered_rmqueue(struct zone *zone, in
                }
                local_irq_restore(flags);
                put_cpu();
 -      }
 -
 -      if (page == NULL) {
 +      } else {
                spin_lock_irqsave(&zone->lock, flags);
                page = __rmqueue(zone, order);
                spin_unlock_irqrestore(&zone->lock, flags);
        return page;
  }
  
 +#define ALLOC_NO_WATERMARKS   0x01 /* don't check watermarks at all */
 +#define ALLOC_HARDER          0x02 /* try to alloc harder */
 +#define ALLOC_HIGH            0x04 /* __GFP_HIGH set */
 +#define ALLOC_CPUSET          0x08 /* check for correct cpuset */
 +
  /*
   * Return 1 if free pages are above 'mark'. This takes into account the order
   * of the allocation.
   */
  int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 -                    int classzone_idx, int can_try_harder, gfp_t gfp_high)
 +                    int classzone_idx, int alloc_flags)
  {
        /* free_pages my go negative - that's OK */
        long min = mark, free_pages = z->free_pages - (1 << order) + 1;
        int o;
  
 -      if (gfp_high)
 +      if (alloc_flags & ALLOC_HIGH)
                min -= min / 2;
 -      if (can_try_harder)
 +      if (alloc_flags & ALLOC_HARDER)
                min -= min / 4;
  
        if (free_pages <= min + z->lowmem_reserve[classzone_idx])
        return 1;
  }
  
 -static inline int
 -should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
 +/*
 + * get_page_from_freeliest goes through the zonelist trying to allocate
 + * a page.
 + */
 +static struct page *
 +get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
 +              struct zonelist *zonelist, int alloc_flags)
  {
 -      if (!z->reclaim_pages)
 -              return 0;
 -      if (gfp_mask & __GFP_NORECLAIM)
 -              return 0;
 -      return 1;
 +      struct zone **z = zonelist->zones;
 +      struct page *page = NULL;
 +      int classzone_idx = zone_idx(*z);
 +
 +      /*
 +       * Go through the zonelist once, looking for a zone with enough free.
 +       * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
 +       */
 +      do {
 +              if ((alloc_flags & ALLOC_CPUSET) &&
 +                              !cpuset_zone_allowed(*z, gfp_mask))
 +                      continue;
 +
 +              if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
 +                      if (!zone_watermark_ok(*z, order, (*z)->pages_low,
 +                                  classzone_idx, alloc_flags))
 +                              continue;
 +              }
 +
 +              page = buffered_rmqueue(*z, order, gfp_mask);
 +              if (page) {
 +                      zone_statistics(zonelist, *z);
 +                      break;
 +              }
 +      } while (*(++z) != NULL);
 +      return page;
  }
  
  /*
@@@ -832,75 -807,105 +835,75 @@@ __alloc_pages(gfp_t gfp_mask, unsigned 
                struct zonelist *zonelist)
  {
        const gfp_t wait = gfp_mask & __GFP_WAIT;
 -      struct zone **zones, *z;
 +      struct zone **z;
        struct page *page;
        struct reclaim_state reclaim_state;
        struct task_struct *p = current;
 -      int i;
 -      int classzone_idx;
        int do_retry;
 -      int can_try_harder;
 +      int alloc_flags;
        int did_some_progress;
  
        might_sleep_if(wait);
  
 -      /*
 -       * The caller may dip into page reserves a bit more if the caller
 -       * cannot run direct reclaim, or is the caller has realtime scheduling
 -       * policy
 -       */
 -      can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
 +      z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
  
 -      zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
 -
 -      if (unlikely(zones[0] == NULL)) {
 +      if (unlikely(*z == NULL)) {
                /* Should this ever happen?? */
                return NULL;
        }
 +restart:
 +      page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
 +                              zonelist, ALLOC_CPUSET);
 +      if (page)
 +              goto got_pg;
  
 -      classzone_idx = zone_idx(zones[0]);
 +      do
 +              wakeup_kswapd(*z, order);
 +      while (*(++z));
  
 -restart:
        /*
 -       * Go through the zonelist once, looking for a zone with enough free.
 -       * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
 +       * OK, we're below the kswapd watermark and have kicked background
 +       * reclaim. Now things get more complex, so set up alloc_flags according
 +       * to how we want to proceed.
 +       *
 +       * The caller may dip into page reserves a bit more if the caller
 +       * cannot run direct reclaim, or if the caller has realtime scheduling
 +       * policy.
         */
 -      for (i = 0; (z = zones[i]) != NULL; i++) {
 -              int do_reclaim = should_reclaim_zone(z, gfp_mask);
 -
 -              if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
 -                      continue;
 -
 -              /*
 -               * If the zone is to attempt early page reclaim then this loop
 -               * will try to reclaim pages and check the watermark a second
 -               * time before giving up and falling back to the next zone.
 -               */
 -zone_reclaim_retry:
 -              if (!zone_watermark_ok(z, order, z->pages_low,
 -                                     classzone_idx, 0, 0)) {
 -                      if (!do_reclaim)
 -                              continue;
 -                      else {
 -                              zone_reclaim(z, gfp_mask, order);
 -                              /* Only try reclaim once */
 -                              do_reclaim = 0;
 -                              goto zone_reclaim_retry;
 -                      }
 -              }
 -
 -              page = buffered_rmqueue(z, order, gfp_mask);
 -              if (page)
 -                      goto got_pg;
 -      }
 -
 -      for (i = 0; (z = zones[i]) != NULL; i++)
 -              wakeup_kswapd(z, order);
 +      alloc_flags = 0;
 +      if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
 +              alloc_flags |= ALLOC_HARDER;
 +      if (gfp_mask & __GFP_HIGH)
 +              alloc_flags |= ALLOC_HIGH;
 +      if (wait)
 +              alloc_flags |= ALLOC_CPUSET;
  
        /*
         * Go through the zonelist again. Let __GFP_HIGH and allocations
 -       * coming from realtime tasks to go deeper into reserves
 +       * coming from realtime tasks go deeper into reserves.
         *
         * This is the last chance, in general, before the goto nopage.
         * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
         */
 -      for (i = 0; (z = zones[i]) != NULL; i++) {
 -              if (!zone_watermark_ok(z, order, z->pages_min,
 -                                     classzone_idx, can_try_harder,
 -                                     gfp_mask & __GFP_HIGH))
 -                      continue;
 -
 -              if (wait && !cpuset_zone_allowed(z, gfp_mask))
 -                      continue;
 -
 -              page = buffered_rmqueue(z, order, gfp_mask);
 -              if (page)
 -                      goto got_pg;
 -      }
 +      page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
 +      if (page)
 +              goto got_pg;
  
        /* This allocation should allow future memory freeing. */
  
        if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
                        && !in_interrupt()) {
                if (!(gfp_mask & __GFP_NOMEMALLOC)) {
 +nofail_alloc:
                        /* go through the zonelist yet again, ignoring mins */
 -                      for (i = 0; (z = zones[i]) != NULL; i++) {
 -                              if (!cpuset_zone_allowed(z, gfp_mask))
 -                                      continue;
 -                              page = buffered_rmqueue(z, order, gfp_mask);
 -                              if (page)
 -                                      goto got_pg;
 +                      page = get_page_from_freelist(gfp_mask, order,
 +                              zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
 +                      if (page)
 +                              goto got_pg;
 +                      if (gfp_mask & __GFP_NOFAIL) {
 +                              blk_congestion_wait(WRITE, HZ/50);
 +                              goto nofail_alloc;
                        }
                }
                goto nopage;
@@@ -918,7 -923,7 +921,7 @@@ rebalance
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
  
 -      did_some_progress = try_to_free_pages(zones, gfp_mask);
 +      did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
  
        p->reclaim_state = NULL;
        p->flags &= ~PF_MEMALLOC;
        cond_resched();
  
        if (likely(did_some_progress)) {
 -              for (i = 0; (z = zones[i]) != NULL; i++) {
 -                      if (!zone_watermark_ok(z, order, z->pages_min,
 -                                             classzone_idx, can_try_harder,
 -                                             gfp_mask & __GFP_HIGH))
 -                              continue;
 -
 -                      if (!cpuset_zone_allowed(z, gfp_mask))
 -                              continue;
 -
 -                      page = buffered_rmqueue(z, order, gfp_mask);
 -                      if (page)
 -                              goto got_pg;
 -              }
 +              page = get_page_from_freelist(gfp_mask, order,
 +                                              zonelist, alloc_flags);
 +              if (page)
 +                      goto got_pg;
        } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
                /*
                 * Go through the zonelist yet one more time, keep
                 * a parallel oom killing, we must fail if we're still
                 * under heavy pressure.
                 */
 -              for (i = 0; (z = zones[i]) != NULL; i++) {
 -                      if (!zone_watermark_ok(z, order, z->pages_high,
 -                                             classzone_idx, 0, 0))
 -                              continue;
 -
 -                      if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
 -                              continue;
 -
 -                      page = buffered_rmqueue(z, order, gfp_mask);
 -                      if (page)
 -                              goto got_pg;
 -              }
 +              page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
 +                                              zonelist, ALLOC_CPUSET);
 +              if (page)
 +                      goto got_pg;
  
                out_of_memory(gfp_mask, order);
                goto restart;
@@@ -973,7 -995,9 +976,7 @@@ nopage
                dump_stack();
                show_mem();
        }
 -      return NULL;
  got_pg:
 -      zone_statistics(zonelist, z);
        return page;
  }
  
@@@ -1310,7 -1334,7 +1313,7 @@@ void show_free_areas(void
                } else
                        printk("\n");
  
 -              for_each_cpu(cpu) {
 +              for_each_online_cpu(cpu) {
                        struct per_cpu_pageset *pageset;
  
                        pageset = zone_pcp(zone, cpu);
@@@ -1421,6 -1445,10 +1424,10 @@@ static int __init build_zonelists_node(
                zone = pgdat->node_zones + ZONE_NORMAL;
                if (zone->present_pages)
                        zonelist->zones[j++] = zone;
+       case ZONE_DMA32:
+               zone = pgdat->node_zones + ZONE_DMA32;
+               if (zone->present_pages)
+                       zonelist->zones[j++] = zone;
        case ZONE_DMA:
                zone = pgdat->node_zones + ZONE_DMA;
                if (zone->present_pages)
@@@ -1435,6 -1463,8 +1442,8 @@@ static inline int highest_zone(int zone
        int res = ZONE_NORMAL;
        if (zone_bits & (__force int)__GFP_HIGHMEM)
                res = ZONE_HIGHMEM;
+       if (zone_bits & (__force int)__GFP_DMA32)
+               res = ZONE_DMA32;
        if (zone_bits & (__force int)__GFP_DMA)
                res = ZONE_DMA;
        return res;
@@@ -1846,11 -1876,10 +1855,10 @@@ static int __devinit pageset_cpuup_call
                        if (process_zones(cpu))
                                ret = NOTIFY_BAD;
                        break;
- #ifdef CONFIG_HOTPLUG_CPU
+               case CPU_UP_CANCELED:
                case CPU_DEAD:
                        free_zone_pagesets(cpu);
                        break;
- #endif
                default:
                        break;
        }
@@@ -1955,7 -1984,7 +1963,7 @@@ static void __init free_area_init_core(
                if (zholes_size)
                        realsize -= zholes_size[j];
  
-               if (j == ZONE_DMA || j == ZONE_NORMAL)
+               if (j < ZONE_HIGHMEM)
                        nr_kernel_pages += realsize;
                nr_all_pages += realsize;
  
@@@ -2397,18 -2426,13 +2405,18 @@@ void setup_per_zone_pages_min(void
        }
  
        for_each_zone(zone) {
 +              unsigned long tmp;
                spin_lock_irqsave(&zone->lru_lock, flags);
 +              tmp = (pages_min * zone->present_pages) / lowmem_pages;
                if (is_highmem(zone)) {
                        /*
 -                       * Often, highmem doesn't need to reserve any pages.
 -                       * But the pages_min/low/high values are also used for
 -                       * batching up page reclaim activity so we need a
 -                       * decent value here.
 +                       * __GFP_HIGH and PF_MEMALLOC allocations usually don't
 +                       * need highmem pages, so cap pages_min to a small
 +                       * value here.
 +                       *
 +                       * The (pages_high-pages_low) and (pages_low-pages_min)
 +                       * deltas controls asynch page reclaim, and so should
 +                       * not be capped for highmem.
                         */
                        int min_pages;
  
                                min_pages = 128;
                        zone->pages_min = min_pages;
                } else {
 -                      /* if it's a lowmem zone, reserve a number of pages
 +                      /*
 +                       * If it's a lowmem zone, reserve a number of pages
                         * proportionate to the zone's size.
                         */
 -                      zone->pages_min = (pages_min * zone->present_pages) /
 -                                         lowmem_pages;
 +                      zone->pages_min = tmp;
                }
  
 -              /*
 -               * When interpreting these watermarks, just keep in mind that:
 -               * zone->pages_min == (zone->pages_min * 4) / 4;
 -               */
 -              zone->pages_low   = (zone->pages_min * 5) / 4;
 -              zone->pages_high  = (zone->pages_min * 6) / 4;
 +              zone->pages_low   = zone->pages_min + tmp / 4;
 +              zone->pages_high  = zone->pages_min + tmp / 2;
                spin_unlock_irqrestore(&zone->lru_lock, flags);
        }
  }