KVM: VMX: Enable XSAVE/XRSTOR for guest
Dexuan Cui [Thu, 10 Jun 2010 03:27:12 +0000 (11:27 +0800)]
This patch enable guest to use XSAVE/XRSTOR instructions.

We assume that host_xcr0 would use all possible bits that OS supported.

And we loaded xcr0 in the same way we handled fpu - do it as late as we can.

Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/vmx.h
arch/x86/kvm/kvm_cache_regs.h
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/linux/kvm_host.h

index 0cd0f29..91631b8 100644 (file)
@@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
        } update_pte;
 
        struct fpu guest_fpu;
+       u64 xcr0;
 
        gva_t mmio_fault_cr2;
        struct kvm_pio_request pio;
@@ -605,6 +606,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
+int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
 
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
index 96a5886..9f0cbd9 100644 (file)
@@ -267,6 +267,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD             54
+#define EXIT_REASON_XSETBV             55
 
 /*
  * Interruption-information format
index d2a98f8..6491ac8 100644 (file)
@@ -71,4 +71,10 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
        return kvm_read_cr4_bits(vcpu, ~0UL);
 }
 
+static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
+{
+       return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
+               | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
+}
+
 #endif
index 26ba61d..864a1b6 100644 (file)
@@ -37,6 +37,8 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #include "trace.h"
 
@@ -3390,6 +3392,16 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+       u64 new_bv = kvm_read_edx_eax(vcpu);
+       u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+
+       if (kvm_set_xcr(vcpu, index, new_bv) == 0)
+               skip_emulated_instruction(vcpu);
+       return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
        return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
@@ -3668,6 +3680,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
        [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
        [EXIT_REASON_WBINVD]                  = handle_wbinvd,
+       [EXIT_REASON_XSETBV]                  = handle_xsetbv,
        [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
        [EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
        [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
index b08c005..b5e6447 100644 (file)
@@ -65,6 +65,7 @@
        (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
                          | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
                          | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
+                         | X86_CR4_OSXSAVE \
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -150,6 +151,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { NULL }
 };
 
+u64 __read_mostly host_xcr0;
+
+static inline u32 bit(int bitno)
+{
+       return 1 << (bitno & 31);
+}
+
 static void kvm_on_user_return(struct user_return_notifier *urn)
 {
        unsigned slot;
@@ -474,6 +482,61 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
+int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+{
+       u64 xcr0;
+
+       /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */
+       if (index != XCR_XFEATURE_ENABLED_MASK)
+               return 1;
+       xcr0 = xcr;
+       if (kvm_x86_ops->get_cpl(vcpu) != 0)
+               return 1;
+       if (!(xcr0 & XSTATE_FP))
+               return 1;
+       if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
+               return 1;
+       if (xcr0 & ~host_xcr0)
+               return 1;
+       vcpu->arch.xcr0 = xcr0;
+       vcpu->guest_xcr0_loaded = 0;
+       return 0;
+}
+
+int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+{
+       if (__kvm_set_xcr(vcpu, index, xcr)) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_set_xcr);
+
+static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best;
+
+       best = kvm_find_cpuid_entry(vcpu, 1, 0);
+       return best && (best->ecx & bit(X86_FEATURE_XSAVE));
+}
+
+static void update_cpuid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best;
+
+       best = kvm_find_cpuid_entry(vcpu, 1, 0);
+       if (!best)
+               return;
+
+       /* Update OSXSAVE bit */
+       if (cpu_has_xsave && best->function == 0x1) {
+               best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+               if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
+                       best->ecx |= bit(X86_FEATURE_OSXSAVE);
+       }
+}
+
 int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        unsigned long old_cr4 = kvm_read_cr4(vcpu);
@@ -482,6 +545,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        if (cr4 & CR4_RESERVED_BITS)
                return 1;
 
+       if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
+               return 1;
+
        if (is_long_mode(vcpu)) {
                if (!(cr4 & X86_CR4_PAE))
                        return 1;
@@ -498,6 +564,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        if ((cr4 ^ old_cr4) & pdptr_bits)
                kvm_mmu_reset_context(vcpu);
 
+       if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
+               update_cpuid(vcpu);
+
        return 0;
 }
 
@@ -666,11 +735,6 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
 
-static inline u32 bit(int bitno)
-{
-       return 1 << (bitno & 31);
-}
-
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -1814,6 +1878,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
        r = 0;
        kvm_apic_set_version(vcpu);
        kvm_x86_ops->cpuid_update(vcpu);
+       update_cpuid(vcpu);
 
 out_free:
        vfree(cpuid_entries);
@@ -1837,6 +1902,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
        vcpu->arch.cpuid_nent = cpuid->nent;
        kvm_apic_set_version(vcpu);
        kvm_x86_ops->cpuid_update(vcpu);
+       update_cpuid(vcpu);
        return 0;
 
 out:
@@ -1917,7 +1983,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
                0 /* Reserved, DCA */ | F(XMM4_1) |
                F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
-               0 /* Reserved, XSAVE, OSXSAVE */;
+               0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */;
        /* cpuid 0x80000001.ecx */
        const u32 kvm_supported_word6_x86_features =
                F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
@@ -1932,7 +1998,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
        switch (function) {
        case 0:
-               entry->eax = min(entry->eax, (u32)0xb);
+               entry->eax = min(entry->eax, (u32)0xd);
                break;
        case 1:
                entry->edx &= kvm_supported_word0_x86_features;
@@ -1990,6 +2056,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                }
                break;
        }
+       case 0xd: {
+               int i;
+
+               entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+               for (i = 1; *nent < maxnent; ++i) {
+                       if (entry[i - 1].eax == 0 && i != 2)
+                               break;
+                       do_cpuid_1_ent(&entry[i], function, i);
+                       entry[i].flags |=
+                              KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                       ++*nent;
+               }
+               break;
+       }
        case KVM_CPUID_SIGNATURE: {
                char signature[12] = "KVMKVMKVM\0\0";
                u32 *sigptr = (u32 *)signature;
@@ -4125,6 +4205,9 @@ int kvm_arch_init(void *opaque)
 
        perf_register_guest_info_callbacks(&kvm_guest_cbs);
 
+       if (cpu_has_xsave)
+               host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+
        return 0;
 
 out:
@@ -4523,6 +4606,25 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
        }
 }
 
+static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+{
+       if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
+                       !vcpu->guest_xcr0_loaded) {
+               /* kvm_set_xcr() also depends on this */
+               xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+               vcpu->guest_xcr0_loaded = 1;
+       }
+}
+
+static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->guest_xcr0_loaded) {
+               if (vcpu->arch.xcr0 != host_xcr0)
+                       xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
+               vcpu->guest_xcr0_loaded = 0;
+       }
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
        int r;
@@ -4568,6 +4670,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        kvm_x86_ops->prepare_guest_switch(vcpu);
        if (vcpu->fpu_active)
                kvm_load_guest_fpu(vcpu);
+       kvm_load_guest_xcr0(vcpu);
 
        atomic_set(&vcpu->guest_mode, 1);
        smp_wmb();
@@ -5124,6 +5227,11 @@ int fx_init(struct kvm_vcpu *vcpu)
 
        fpu_finit(&vcpu->arch.guest_fpu);
 
+       /*
+        * Ensure guest xcr0 is valid for loading
+        */
+       vcpu->arch.xcr0 = XSTATE_FP;
+
        vcpu->arch.cr0 |= X86_CR0_ET;
 
        return 0;
@@ -5140,6 +5248,12 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
        if (vcpu->guest_fpu_loaded)
                return;
 
+       /*
+        * Restore all possible states in the guest,
+        * and assume host would use all available bits.
+        * Guest xcr0 would be loaded later.
+        */
+       kvm_put_guest_xcr0(vcpu);
        vcpu->guest_fpu_loaded = 1;
        unlazy_fpu(current);
        fpu_restore_checking(&vcpu->arch.guest_fpu);
@@ -5148,6 +5262,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
+       kvm_put_guest_xcr0(vcpu);
+
        if (!vcpu->guest_fpu_loaded)
                return;
 
index 2c62319..2d96555 100644 (file)
@@ -88,7 +88,7 @@ struct kvm_vcpu {
        int srcu_idx;
 
        int fpu_active;
-       int guest_fpu_loaded;
+       int guest_fpu_loaded, guest_xcr0_loaded;
        wait_queue_head_t wq;
        int sigset_active;
        sigset_t sigset;