Merge branch 'linus' into perf/core
authorIngo Molnar <mingo@elte.hu>
Fri, 23 Apr 2010 09:10:28 +0000 (11:10 +0200)
committerIngo Molnar <mingo@elte.hu>
Fri, 23 Apr 2010 09:10:30 +0000 (11:10 +0200)
Merge reason: merge the latest fixes, update to latest -rc.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
1  2 
MAINTAINERS
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
kernel/sched.c

diff --combined MAINTAINERS
index c3e9c3633b75ca89bee21c126339be1053717770,183887518fe3a04e137c192186348ba59f043543..693c2fe17dadd3567fc7306358fa590552943ff5
@@@ -485,8 -485,8 +485,8 @@@ S: Maintaine
  F:    drivers/input/mouse/bcm5974.c
  
  APPLE SMC DRIVER
- M:    Nicolas Boichat <nicolas@boichat.ch>
- L:    mactel-linux-devel@lists.sourceforge.net
+ M:    Henrik Rydberg <rydberg@euromail.se>
+ L:    lm-sensors@lm-sensors.org
  S:    Maintained
  F:    drivers/hwmon/applesmc.c
  
@@@ -971,6 -971,16 +971,16 @@@ L:       linux-arm-kernel@lists.infradead.or
  W:    http://www.mcuos.com
  S:    Maintained
  
+ ARM/U300 MACHINE SUPPORT
+ M:    Linus Walleij <linus.walleij@stericsson.com>
+ L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ S:    Supported
+ F:    arch/arm/mach-u300/
+ F:    drivers/i2c/busses/i2c-stu300.c
+ F:    drivers/rtc/rtc-coh901331.c
+ F:    drivers/watchdog/coh901327_wdt.c
+ F:    drivers/dma/coh901318*
  ARM/U8500 ARM ARCHITECTURE
  M:    Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1950,7 -1960,7 +1960,7 @@@ F:      lib/kobj
  
  DRM DRIVERS
  M:    David Airlie <airlied@linux.ie>
- L:    dri-devel@lists.sourceforge.net
+ L:    dri-devel@lists.freedesktop.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6.git
  S:    Maintained
  F:    drivers/gpu/drm/
@@@ -4343,13 -4353,13 +4353,13 @@@ M:   Paul Mackerras <paulus@samba.org
  M:    Ingo Molnar <mingo@elte.hu>
  M:    Arnaldo Carvalho de Melo <acme@redhat.com>
  S:    Supported
 -F:    kernel/perf_event.c
 +F:    kernel/perf_event*.c
  F:    include/linux/perf_event.h
 -F:    arch/*/kernel/perf_event.c
 -F:    arch/*/kernel/*/perf_event.c
 -F:    arch/*/kernel/*/*/perf_event.c
 +F:    arch/*/kernel/perf_event*.c
 +F:    arch/*/kernel/*/perf_event*.c
 +F:    arch/*/kernel/*/*/perf_event*.c
  F:    arch/*/include/asm/perf_event.h
 -F:    arch/*/lib/perf_event.c
 +F:    arch/*/lib/perf_event*.c
  F:    arch/*/kernel/perf_callchain.c
  F:    tools/perf/
  
@@@ -4781,12 -4791,11 +4791,11 @@@ F:   drivers/s390/crypto
  
  S390 ZFCP DRIVER
  M:    Christof Schmitt <christof.schmitt@de.ibm.com>
- M:    Martin Peschke <mp3@de.ibm.com>
+ M:    Swen Schillig <swen@vnet.ibm.com>
  M:    linux390@de.ibm.com
  L:    linux-s390@vger.kernel.org
  W:    http://www.ibm.com/developerworks/linux/linux390/
  S:    Supported
- F:    Documentation/s390/zfcpdump.txt
  F:    drivers/s390/scsi/zfcp_*
  
  S390 IUCV NETWORK LAYER
diff --combined arch/x86/kvm/vmx.c
index 82be6dac3d25f034b3c829af7d0fb7560af70b50,bc933cfb4e66d7fff8af14a8a4d8beac2ce6fed3..32022a8a5c3b342e9a69350e7241c010e3803268
@@@ -77,6 -77,8 +77,8 @@@ module_param(emulate_invalid_guest_stat
  #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
  #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
  
+ #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
  /*
   * These 2 parameters are used to config the controls for Pause-Loop Exiting:
   * ple_gap:    upper bound on the amount of time between two successive
@@@ -131,7 -133,7 +133,7 @@@ struct vcpu_vmx 
        } host_state;
        struct {
                int vm86_active;
-               u8 save_iopl;
+               ulong save_rflags;
                struct kvm_save_segment {
                        u16 selector;
                        unsigned long base;
@@@ -818,18 -820,23 +820,23 @@@ static void vmx_fpu_deactivate(struct k
  
  static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
  {
-       unsigned long rflags;
+       unsigned long rflags, save_rflags;
  
        rflags = vmcs_readl(GUEST_RFLAGS);
-       if (to_vmx(vcpu)->rmode.vm86_active)
-               rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
+       if (to_vmx(vcpu)->rmode.vm86_active) {
+               rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
+               save_rflags = to_vmx(vcpu)->rmode.save_rflags;
+               rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
+       }
        return rflags;
  }
  
  static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
  {
-       if (to_vmx(vcpu)->rmode.vm86_active)
+       if (to_vmx(vcpu)->rmode.vm86_active) {
+               to_vmx(vcpu)->rmode.save_rflags = rflags;
                rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
+       }
        vmcs_writel(GUEST_RFLAGS, rflags);
  }
  
@@@ -1483,8 -1490,8 +1490,8 @@@ static void enter_pmode(struct kvm_vcp
        vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
  
        flags = vmcs_readl(GUEST_RFLAGS);
-       flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
-       flags |= (vmx->rmode.save_iopl << IOPL_SHIFT);
+       flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
+       flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
        vmcs_writel(GUEST_RFLAGS, flags);
  
        vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@@ -1557,8 -1564,7 +1564,7 @@@ static void enter_rmode(struct kvm_vcp
        vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
  
        flags = vmcs_readl(GUEST_RFLAGS);
-       vmx->rmode.save_iopl
-               = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+       vmx->rmode.save_rflags = flags;
  
        flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
  
@@@ -3654,11 -3660,8 +3660,11 @@@ static void vmx_complete_interrupts(str
  
        /* We need to handle NMIs before interrupts are enabled */
        if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
 -          (exit_intr_info & INTR_INFO_VALID_MASK))
 +          (exit_intr_info & INTR_INFO_VALID_MASK)) {
 +              kvm_before_handle_nmi(&vmx->vcpu);
                asm("int $2");
 +              kvm_after_handle_nmi(&vmx->vcpu);
 +      }
  
        idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
  
diff --combined arch/x86/kvm/x86.c
index 21b9b6aa3e88512667a41dfaffb20ce2ed201da7,3c4ca98ad27fe26fabf8d8529fc75be0a6971c67..73d854c36e39ce1a9fbf748b99ade29ecc14489c
@@@ -40,7 -40,6 +40,7 @@@
  #include <linux/user-return-notifier.h>
  #include <linux/srcu.h>
  #include <linux/slab.h>
 +#include <linux/perf_event.h>
  #include <trace/events/kvm.h>
  #undef TRACE_INCLUDE_FILE
  #define CREATE_TRACE_POINTS
@@@ -434,8 -433,6 +434,6 @@@ void kvm_set_cr0(struct kvm_vcpu *vcpu
  
  #ifdef CONFIG_X86_64
        if (cr0 & 0xffffffff00000000UL) {
-               printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
-                      cr0, kvm_read_cr0(vcpu));
                kvm_inject_gp(vcpu, 0);
                return;
        }
        cr0 &= ~CR0_RESERVED_BITS;
  
        if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
-               printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
                kvm_inject_gp(vcpu, 0);
                return;
        }
  
        if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
-               printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
-                      "and a clear PE flag\n");
                kvm_inject_gp(vcpu, 0);
                return;
        }
                        int cs_db, cs_l;
  
                        if (!is_pae(vcpu)) {
-                               printk(KERN_DEBUG "set_cr0: #GP, start paging "
-                                      "in long mode while PAE is disabled\n");
                                kvm_inject_gp(vcpu, 0);
                                return;
                        }
                        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
                        if (cs_l) {
-                               printk(KERN_DEBUG "set_cr0: #GP, start paging "
-                                      "in long mode while CS.L == 1\n");
                                kvm_inject_gp(vcpu, 0);
                                return;
  
                } else
  #endif
                if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
-                       printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
-                              "reserved bits\n");
                        kvm_inject_gp(vcpu, 0);
                        return;
                }
@@@ -506,28 -494,23 +495,23 @@@ void kvm_set_cr4(struct kvm_vcpu *vcpu
        unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
  
        if (cr4 & CR4_RESERVED_BITS) {
-               printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
                kvm_inject_gp(vcpu, 0);
                return;
        }
  
        if (is_long_mode(vcpu)) {
                if (!(cr4 & X86_CR4_PAE)) {
-                       printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
-                              "in long mode\n");
                        kvm_inject_gp(vcpu, 0);
                        return;
                }
        } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
                   && ((cr4 ^ old_cr4) & pdptr_bits)
                   && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
-               printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
                kvm_inject_gp(vcpu, 0);
                return;
        }
  
        if (cr4 & X86_CR4_VMXE) {
-               printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
                kvm_inject_gp(vcpu, 0);
                return;
        }
@@@ -548,21 -531,16 +532,16 @@@ void kvm_set_cr3(struct kvm_vcpu *vcpu
  
        if (is_long_mode(vcpu)) {
                if (cr3 & CR3_L_MODE_RESERVED_BITS) {
-                       printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
                        kvm_inject_gp(vcpu, 0);
                        return;
                }
        } else {
                if (is_pae(vcpu)) {
                        if (cr3 & CR3_PAE_RESERVED_BITS) {
-                               printk(KERN_DEBUG
-                                      "set_cr3: #GP, reserved bits\n");
                                kvm_inject_gp(vcpu, 0);
                                return;
                        }
                        if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
-                               printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
-                                      "reserved bits\n");
                                kvm_inject_gp(vcpu, 0);
                                return;
                        }
@@@ -594,7 -572,6 +573,6 @@@ EXPORT_SYMBOL_GPL(kvm_set_cr3)
  void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
  {
        if (cr8 & CR8_RESERVED_BITS) {
-               printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
                kvm_inject_gp(vcpu, 0);
                return;
        }
@@@ -650,15 -627,12 +628,12 @@@ static u32 emulated_msrs[] = 
  static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
  {
        if (efer & efer_reserved_bits) {
-               printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
-                      efer);
                kvm_inject_gp(vcpu, 0);
                return;
        }
  
        if (is_paging(vcpu)
            && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
-               printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
                kvm_inject_gp(vcpu, 0);
                return;
        }
  
                feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
                if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
-                       printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
                        kvm_inject_gp(vcpu, 0);
                        return;
                }
  
                feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
                if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
-                       printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
                        kvm_inject_gp(vcpu, 0);
                        return;
                }
@@@ -968,9 -940,13 +941,13 @@@ static int set_msr_mce(struct kvm_vcpu 
                if (msr >= MSR_IA32_MC0_CTL &&
                    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
                        u32 offset = msr - MSR_IA32_MC0_CTL;
-                       /* only 0 or all 1s can be written to IA32_MCi_CTL */
+                       /* only 0 or all 1s can be written to IA32_MCi_CTL
+                        * some Linux kernels though clear bit 10 in bank 4 to
+                        * workaround a BIOS/GART TBL issue on AMD K8s, ignore
+                        * this to avoid an uncatched #GP in the guest
+                        */
                        if ((offset & 0x3) == 0 &&
-                           data != 0 && data != ~(u64)0)
+                           data != 0 && (data | (1 << 10)) != ~(u64)0)
                                return -1;
                        vcpu->arch.mce_banks[offset] = data;
                        break;
@@@ -2636,8 -2612,9 +2613,9 @@@ static int kvm_vm_ioctl_reinject(struc
  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
                                      struct kvm_dirty_log *log)
  {
-       int r, n, i;
+       int r, i;
        struct kvm_memory_slot *memslot;
+       unsigned long n;
        unsigned long is_dirty = 0;
        unsigned long *dirty_bitmap = NULL;
  
        if (!memslot->dirty_bitmap)
                goto out;
  
-       n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+       n = kvm_dirty_bitmap_bytes(memslot);
  
        r = -ENOMEM;
        dirty_bitmap = vmalloc(n);
@@@ -3766,51 -3743,6 +3744,51 @@@ static void kvm_timer_init(void
        }
  }
  
 +static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
 +
 +static int kvm_is_in_guest(void)
 +{
 +      return percpu_read(current_vcpu) != NULL;
 +}
 +
 +static int kvm_is_user_mode(void)
 +{
 +      int user_mode = 3;
 +
 +      if (percpu_read(current_vcpu))
 +              user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
 +
 +      return user_mode != 0;
 +}
 +
 +static unsigned long kvm_get_guest_ip(void)
 +{
 +      unsigned long ip = 0;
 +
 +      if (percpu_read(current_vcpu))
 +              ip = kvm_rip_read(percpu_read(current_vcpu));
 +
 +      return ip;
 +}
 +
 +static struct perf_guest_info_callbacks kvm_guest_cbs = {
 +      .is_in_guest            = kvm_is_in_guest,
 +      .is_user_mode           = kvm_is_user_mode,
 +      .get_guest_ip           = kvm_get_guest_ip,
 +};
 +
 +void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
 +{
 +      percpu_write(current_vcpu, vcpu);
 +}
 +EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
 +
 +void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
 +{
 +      percpu_write(current_vcpu, NULL);
 +}
 +EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
 +
  int kvm_arch_init(void *opaque)
  {
        int r;
  
        kvm_timer_init();
  
 +      perf_register_guest_info_callbacks(&kvm_guest_cbs);
 +
        return 0;
  
  out:
  
  void kvm_arch_exit(void)
  {
 +      perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
 +
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
                                            CPUFREQ_TRANSITION_NOTIFIER);
@@@ -4533,7 -4461,9 +4511,9 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
                kvm_set_cr8(vcpu, kvm_run->cr8);
  
        if (vcpu->arch.pio.cur_count) {
+               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = complete_pio(vcpu);
+               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
                if (r)
                        goto out;
        }
@@@ -5196,6 -5126,7 +5176,7 @@@ int kvm_task_switch(struct kvm_vcpu *vc
        int ret = 0;
        u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
        u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
+       u32 desc_limit;
  
        old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
  
                }
        }
  
-       if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
+       desc_limit = get_desc_limit(&nseg_desc);
+       if (!nseg_desc.p ||
+           ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
+            desc_limit < 0x2b)) {
                kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
                return 1;
        }
diff --combined kernel/sched.c
index 8cafe3ff558fec69a0c3c4676aa61191bb6e84da,6af210a7de70d394015617863ee303059c9dd4d6..b0bbadc2495506d874846636eaf16588f3760d6d
@@@ -2077,6 -2077,49 +2077,6 @@@ migrate_task(struct task_struct *p, in
        return 1;
  }
  
 -/*
 - * wait_task_context_switch - wait for a thread to complete at least one
 - *                            context switch.
 - *
 - * @p must not be current.
 - */
 -void wait_task_context_switch(struct task_struct *p)
 -{
 -      unsigned long nvcsw, nivcsw, flags;
 -      int running;
 -      struct rq *rq;
 -
 -      nvcsw   = p->nvcsw;
 -      nivcsw  = p->nivcsw;
 -      for (;;) {
 -              /*
 -               * The runqueue is assigned before the actual context
 -               * switch. We need to take the runqueue lock.
 -               *
 -               * We could check initially without the lock but it is
 -               * very likely that we need to take the lock in every
 -               * iteration.
 -               */
 -              rq = task_rq_lock(p, &flags);
 -              running = task_running(rq, p);
 -              task_rq_unlock(rq, &flags);
 -
 -              if (likely(!running))
 -                      break;
 -              /*
 -               * The switch count is incremented before the actual
 -               * context switch. We thus wait for two switches to be
 -               * sure at least one completed.
 -               */
 -              if ((p->nvcsw - nvcsw) > 1)
 -                      break;
 -              if ((p->nivcsw - nivcsw) > 1)
 -                      break;
 -
 -              cpu_relax();
 -      }
 -}
 -
  /*
   * wait_task_inactive - wait for a thread to unschedule.
   *
@@@ -4860,7 -4903,7 +4860,7 @@@ SYSCALL_DEFINE3(sched_getaffinity, pid_
        int ret;
        cpumask_var_t mask;
  
-       if (len < nr_cpu_ids)
+       if ((len * BITS_PER_BYTE) < nr_cpu_ids)
                return -EINVAL;
        if (len & (sizeof(unsigned long)-1))
                return -EINVAL;