&guest_hv_clock, sizeof(guest_hv_clock))))
return 0;
- /*
- * The interface expects us to write an even number signaling that the
- * update is finished. Since the guest won't see the intermediate
- * state, we just increase by 2 at the end.
+ /* This VCPU is paused, but it's legal for a guest to read another
+ * VCPU's kvmclock, so we really have to follow the specification where
+ * it says that version is odd if data is being modified, and even after
+ * it is consistent.
+ *
+ * Version field updates must be kept separate. This is because
+ * kvm_write_guest_cached might use a "rep movs" instruction, and
+ * writes within a string instruction are weakly ordered. So there
+ * are three writes overall.
+ *
+ * As a small optimization, only write the version field in the first
+ * and third write. The vcpu->pv_time cache is still valid, because the
+ * version field is the first in the struct.
*/
- vcpu->hv_clock.version = guest_hv_clock.version + 2;
+ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+
+ vcpu->hv_clock.version = guest_hv_clock.version + 1;
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock.version));
+
+ smp_wmb();
/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
&vcpu->hv_clock,
sizeof(vcpu->hv_clock));
+
+ smp_wmb();
+
+ vcpu->hv_clock.version++;
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock.version));
return 0;
}
kvm_set_mmio_spte_mask();
kvm_x86_ops = ops;
- kvm_init_msr_list();
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
int kvm_arch_hardware_setup(void)
{
- return kvm_x86_ops->hardware_setup();
+ int r;
+
+ r = kvm_x86_ops->hardware_setup();
+ if (r != 0)
+ return r;
+
+ kvm_init_msr_list();
+ return 0;
}
void kvm_arch_hardware_unsetup(void)