]> nv-tegra.nvidia Code Review - linux-3.10.git/commitdiff
Merge branch 'for-upstream' of https://github.com/agraf/linux-2.6 into queue
authorMarcelo Tosatti <mtosatti@redhat.com>
Sun, 9 Dec 2012 20:44:10 +0000 (18:44 -0200)
committerMarcelo Tosatti <mtosatti@redhat.com>
Sun, 9 Dec 2012 20:44:10 +0000 (18:44 -0200)
* 'for-upstream' of https://github.com/agraf/linux-2.6: (28 commits)
  KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface
  KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation
  KVM: PPC: bookehv: Add guest computation mode for irq delivery
  KVM: PPC: Make EPCR a valid field for booke64 and bookehv
  KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit
  KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation
  KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation
  KVM: PPC: e500: Add emulation helper for getting instruction ea
  KVM: PPC: bookehv64: Add support for interrupt handling
  KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler
  KVM: PPC: booke: Fix get_tb() compile error on 64-bit
  KVM: PPC: e500: Silence bogus GCC warning in tlb code
  KVM: PPC: Book3S HV: Handle guest-caused machine checks on POWER7 without panicking
  KVM: PPC: Book3S HV: Improve handling of local vs. global TLB invalidations
  MAINTAINERS: Add git tree link for PPC KVM
  KVM: PPC: Book3S PR: MSR_DE doesn't exist on Book 3S
  KVM: PPC: Book3S PR: Fix VSX handling
  KVM: PPC: Book3S PR: Emulate PURR, SPURR and DSCR registers
  KVM: PPC: Book3S HV: Don't give the guest RW access to RO pages
  KVM: PPC: Book3S HV: Report correct HPT entry index when reading HPT
  ...

arch/x86/include/asm/kexec.h
arch/x86/include/asm/vmx.h
arch/x86/kernel/crash.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/linux/kvm_host.h
virt/kvm/assigned-dev.c
virt/kvm/irq_comm.c

index 317ff1703d0b0a5e7eaf17b94fd93640b8e62997..28feeba2fdd62e2347e0e7e6fa8ce7f2d80d33c3 100644 (file)
@@ -163,6 +163,8 @@ struct kimage_arch {
 };
 #endif
 
+extern void (*crash_vmclear_loaded_vmcss)(void);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_KEXEC_H */
index 36ec21c36d68dad618f31e1be5bf423950262f26..c2d56b34830dc851b4d14781b231fa7380f38181 100644 (file)
@@ -445,8 +445,7 @@ enum vmcs_field {
 #define VMX_EPTP_WB_BIT                                (1ull << 14)
 #define VMX_EPT_2MB_PAGE_BIT                   (1ull << 16)
 #define VMX_EPT_1GB_PAGE_BIT                   (1ull << 17)
-#define VMX_EPT_AD_BIT                                 (1ull << 21)
-#define VMX_EPT_EXTENT_INDIVIDUAL_BIT          (1ull << 24)
+#define VMX_EPT_AD_BIT                             (1ull << 21)
 #define VMX_EPT_EXTENT_CONTEXT_BIT             (1ull << 25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT              (1ull << 26)
 
index 13ad89971d474557c4825d22dcb6d7da6e49ffc1..2f6b8e838d18f98122c148655d657d5c4f37d07c 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
+#include <linux/module.h>
 
 #include <asm/processor.h>
 #include <asm/hardirq.h>
 
 int in_crash_kexec;
 
+/*
+ * This is used to VMCLEAR all VMCSs loaded on the
+ * processor. And when loading kvm_intel module, the
+ * callback function pointer will be assigned.
+ *
+ * protected by rcu.
+ */
+void (*crash_vmclear_loaded_vmcss)(void) = NULL;
+EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+
+static inline void cpu_crash_vmclear_loaded_vmcss(void)
+{
+       void (*do_vmclear_operation)(void) = NULL;
+
+       rcu_read_lock();
+       do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
+       if (do_vmclear_operation)
+               do_vmclear_operation();
+       rcu_read_unlock();
+}
+
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
 static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 #endif
        crash_save_cpu(regs, cpu);
 
+       /*
+        * VMCLEAR VMCSs loaded on all cpus if needed.
+        */
+       cpu_crash_vmclear_loaded_vmcss();
+
        /* Disable VMX or SVM if needed.
         *
         * We need to disable virtualization on all CPUs.
@@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 
        kdump_nmi_shootdown_cpus();
 
+       /*
+        * VMCLEAR VMCSs loaded on this cpu if needed.
+        */
+       cpu_crash_vmclear_loaded_vmcss();
+
        /* Booting kdump kernel with VMX or SVM enabled won't work,
         * because (among other limitations) we can't disable paging
         * with the virt flags.
index 52f6166ef92c23b9c6f555f6b22784ce3f837a45..a20ecb5b6cbf3543490ab6a74a969aa45a1862c5 100644 (file)
@@ -661,6 +661,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
        } else
                *eax = *ebx = *ecx = *edx = 0;
 }
+EXPORT_SYMBOL_GPL(kvm_cpuid);
 
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
index b875a9ed9b8e3d75dc92e2889b58ed12926c3906..01d7c2ad05f5760bb656fef17f512c66e5e87dce 100644 (file)
@@ -2382,12 +2382,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
            || (!vcpu->arch.mmu.direct_map && write_fault
                && !is_write_protection(vcpu) && !user_fault)) {
 
+               /*
+                * There are two cases:
+                * - the one is other vcpu creates new sp in the window
+                *   between mapping_level() and acquiring mmu-lock.
+                * - the another case is the new sp is created by itself
+                *   (page-fault path) when guest uses the target gfn as
+                *   its page table.
+                * Both of these cases can be fixed by allowing guest to
+                * retry the access, it will refault, then we can establish
+                * the mapping by using small page.
+                */
                if (level > PT_PAGE_TABLE_LEVEL &&
-                   has_wrprotected_page(vcpu->kvm, gfn, level)) {
-                       ret = 1;
-                       drop_spte(vcpu->kvm, sptep);
+                   has_wrprotected_page(vcpu->kvm, gfn, level))
                        goto done;
-               }
 
                spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
 
index dcb79527e7aaadd35ca6301122ea58f98e310fc7..d29d3cd1c15657619086e5e4877766aaefc02214 100644 (file)
@@ -20,6 +20,7 @@
 #include "mmu.h"
 #include "kvm_cache_regs.h"
 #include "x86.h"
+#include "cpuid.h"
 
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
@@ -1193,6 +1194,8 @@ static void init_vmcb(struct vcpu_svm *svm)
 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+       u32 dummy;
+       u32 eax = 1;
 
        init_vmcb(svm);
 
@@ -1201,8 +1204,9 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
                svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
                svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
        }
-       vcpu->arch.regs_avail = ~0;
-       vcpu->arch.regs_dirty = ~0;
+
+       kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
+       kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
 
        return 0;
 }
@@ -1259,10 +1263,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
        svm->asid_generation = 0;
        init_vmcb(svm);
 
-       err = fx_init(&svm->vcpu);
-       if (err)
-               goto free_page4;
-
        svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
        if (kvm_vcpu_is_bsp(&svm->vcpu))
                svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
@@ -1271,8 +1271,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
        return &svm->vcpu;
 
-free_page4:
-       __free_page(hsave_page);
 free_page3:
        __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
 free_page2:
index 2fd2046dc94c15c0bf17c82aa35bbbf5252fc459..1a30fd5c3fb2b63ac52f2b8ddb81e7f8e6404271 100644 (file)
@@ -42,6 +42,7 @@
 #include <asm/i387.h>
 #include <asm/xcr.h>
 #include <asm/perf_event.h>
+#include <asm/kexec.h>
 
 #include "trace.h"
 
@@ -802,11 +803,6 @@ static inline bool cpu_has_vmx_ept_ad_bits(void)
        return vmx_capability.ept & VMX_EPT_AD_BIT;
 }
 
-static inline bool cpu_has_vmx_invept_individual_addr(void)
-{
-       return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
-}
-
 static inline bool cpu_has_vmx_invept_context(void)
 {
        return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
@@ -992,6 +988,46 @@ static void vmcs_load(struct vmcs *vmcs)
                       vmcs, phys_addr);
 }
 
+#ifdef CONFIG_KEXEC
+/*
+ * This bitmap is used to indicate whether the vmclear
+ * operation is enabled on all cpus. All disabled by
+ * default.
+ */
+static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
+
+static inline void crash_enable_local_vmclear(int cpu)
+{
+       cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static inline void crash_disable_local_vmclear(int cpu)
+{
+       cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static inline int crash_local_vmclear_enabled(int cpu)
+{
+       return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static void crash_vmclear_local_loaded_vmcss(void)
+{
+       int cpu = raw_smp_processor_id();
+       struct loaded_vmcs *v;
+
+       if (!crash_local_vmclear_enabled(cpu))
+               return;
+
+       list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
+                           loaded_vmcss_on_cpu_link)
+               vmcs_clear(v->vmcs);
+}
+#else
+static inline void crash_enable_local_vmclear(int cpu) { }
+static inline void crash_disable_local_vmclear(int cpu) { }
+#endif /* CONFIG_KEXEC */
+
 static void __loaded_vmcs_clear(void *arg)
 {
        struct loaded_vmcs *loaded_vmcs = arg;
@@ -1001,6 +1037,7 @@ static void __loaded_vmcs_clear(void *arg)
                return; /* vcpu migration can race with cpu offline */
        if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
                per_cpu(current_vmcs, cpu) = NULL;
+       crash_disable_local_vmclear(cpu);
        list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
 
        /*
@@ -1012,6 +1049,7 @@ static void __loaded_vmcs_clear(void *arg)
        smp_wmb();
 
        loaded_vmcs_init(loaded_vmcs);
+       crash_enable_local_vmclear(cpu);
 }
 
 static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
@@ -1062,17 +1100,6 @@ static inline void ept_sync_context(u64 eptp)
        }
 }
 
-static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
-{
-       if (enable_ept) {
-               if (cpu_has_vmx_invept_individual_addr())
-                       __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
-                                       eptp, gpa);
-               else
-                       ept_sync_context(eptp);
-       }
-}
-
 static __always_inline unsigned long vmcs_readl(unsigned long field)
 {
        unsigned long value;
@@ -1546,6 +1573,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
                local_irq_disable();
+               crash_disable_local_vmclear(cpu);
 
                /*
                 * Read loaded_vmcs->cpu should be before fetching
@@ -1556,6 +1584,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
                list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
                         &per_cpu(loaded_vmcss_on_cpu, cpu));
+               crash_enable_local_vmclear(cpu);
                local_irq_enable();
 
                /*
@@ -2369,6 +2398,18 @@ static int hardware_enable(void *garbage)
                return -EBUSY;
 
        INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+
+       /*
+        * Now we can enable the vmclear operation in kdump
+        * since the loaded_vmcss_on_cpu list on this cpu
+        * has been initialized.
+        *
+        * Though the cpu is not in VMX operation now, there
+        * is no problem to enable the vmclear operation
+        * for the loaded_vmcss_on_cpu list is empty!
+        */
+       crash_enable_local_vmclear(cpu);
+
        rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
 
        test_bits = FEATURE_CONTROL_LOCKED;
@@ -3934,8 +3975,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        u64 msr;
        int ret;
 
-       vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
-
        vmx->rmode.vm86_active = 0;
 
        vmx->soft_vnmi_blocked = 0;
@@ -3947,10 +3986,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                msr |= MSR_IA32_APICBASE_BSP;
        kvm_set_apic_base(&vmx->vcpu, msr);
 
-       ret = fx_init(&vmx->vcpu);
-       if (ret != 0)
-               goto out;
-
        vmx_segment_cache_clear(vmx);
 
        seg_setup(VCPU_SREG_CS);
@@ -3991,7 +4026,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                kvm_rip_write(vcpu, 0xfff0);
        else
                kvm_rip_write(vcpu, 0);
-       kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
 
        vmcs_writel(GUEST_GDTR_BASE, 0);
        vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
@@ -4041,7 +4075,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        /* HACK: Don't enable emulation on guest boot/reset */
        vmx->emulation_required = 0;
 
-out:
        return ret;
 }
 
@@ -4863,11 +4896,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
-       if (exit_qualification & (1 << 6)) {
-               printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
-               return -EINVAL;
-       }
-
        gla_validity = (exit_qualification >> 7) & 0x3;
        if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
                printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
@@ -7412,6 +7440,11 @@ static int __init vmx_init(void)
        if (r)
                goto out3;
 
+#ifdef CONFIG_KEXEC
+       rcu_assign_pointer(crash_vmclear_loaded_vmcss,
+                          crash_vmclear_local_loaded_vmcss);
+#endif
+
        vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
        vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
        vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
@@ -7449,6 +7482,11 @@ static void __exit vmx_exit(void)
        free_page((unsigned long)vmx_io_bitmap_b);
        free_page((unsigned long)vmx_io_bitmap_a);
 
+#ifdef CONFIG_KEXEC
+       rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
+       synchronize_rcu();
+#endif
+
        kvm_exit();
 }
 
index 3bdaf298b8c796dc7c5e6ecdc42ec56d453ed905..57c76e86e9bdf7fdfea56eae9f13040eed2ae089 100644 (file)
@@ -6461,6 +6461,10 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 
        kvm_pmu_reset(vcpu);
 
+       memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
+       vcpu->arch.regs_avail = ~0;
+       vcpu->arch.regs_dirty = ~0;
+
        return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
@@ -6629,11 +6633,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
                goto fail_free_mce_banks;
 
+       r = fx_init(vcpu);
+       if (r)
+               goto fail_free_wbinvd_dirty_mask;
+
        vcpu->arch.ia32_tsc_adjust_msr = 0x0;
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
 
        return 0;
+fail_free_wbinvd_dirty_mask:
+       free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
 fail_free_mce_banks:
        kfree(vcpu->arch.mce_banks);
 fail_free_lapic:
index c823e47c364152bfb4e67af98222833ca0f244f6..91ae127f4ac57ba5dd08da48095b906f526bad4b 100644 (file)
@@ -693,6 +693,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
                                   unsigned long *deliver_bitmask);
 #endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
                int irq_source_id, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
index 23a41a9f8db999f4b70fd4c0cd4b6ae8b26ee3c2..3642239252b0015593fe690ec0fa83ae5b841145 100644 (file)
@@ -105,6 +105,15 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
 }
 
 #ifdef __KVM_HAVE_MSI
+static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+       int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+                                      assigned_dev->irq_source_id,
+                                      assigned_dev->guest_irq, 1);
+       return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
 static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
 {
        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
@@ -117,6 +126,23 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
 #endif
 
 #ifdef __KVM_HAVE_MSIX
+static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+       int index = find_index_from_host_irq(assigned_dev, irq);
+       u32 vector;
+       int ret = 0;
+
+       if (index >= 0) {
+               vector = assigned_dev->guest_msix_entries[index].vector;
+               ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+                                          assigned_dev->irq_source_id,
+                                          vector, 1);
+       }
+
+       return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
 static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
 {
        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
@@ -334,11 +360,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
 }
 
 #ifdef __KVM_HAVE_MSI
-static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
-{
-       return IRQ_WAKE_THREAD;
-}
-
 static int assigned_device_enable_host_msi(struct kvm *kvm,
                                           struct kvm_assigned_dev_kernel *dev)
 {
@@ -363,11 +384,6 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
 #endif
 
 #ifdef __KVM_HAVE_MSIX
-static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
-{
-       return IRQ_WAKE_THREAD;
-}
-
 static int assigned_device_enable_host_msix(struct kvm *kvm,
                                            struct kvm_assigned_dev_kernel *dev)
 {
index 2eb58af7ee99268b81a01680db591ea4991c47eb..656fa455e154abedceba3883cf865adfa73f873b 100644 (file)
@@ -102,6 +102,23 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
        return r;
 }
 
+static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+                                  struct kvm_lapic_irq *irq)
+{
+       trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+
+       irq->dest_id = (e->msi.address_lo &
+                       MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+       irq->vector = (e->msi.data &
+                       MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+       irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+       irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
+       irq->delivery_mode = e->msi.data & 0x700;
+       irq->level = 1;
+       irq->shorthand = 0;
+       /* TODO Deal with RH bit of MSI message address */
+}
+
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
                struct kvm *kvm, int irq_source_id, int level)
 {
@@ -110,22 +127,26 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
        if (!level)
                return -1;
 
-       trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+       kvm_set_msi_irq(e, &irq);
 
-       irq.dest_id = (e->msi.address_lo &
-                       MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
-       irq.vector = (e->msi.data &
-                       MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
-       irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
-       irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
-       irq.delivery_mode = e->msi.data & 0x700;
-       irq.level = 1;
-       irq.shorthand = 0;
-
-       /* TODO Deal with RH bit of MSI message address */
        return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
+
+static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
+                        struct kvm *kvm)
+{
+       struct kvm_lapic_irq irq;
+       int r;
+
+       kvm_set_msi_irq(e, &irq);
+
+       if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r))
+               return r;
+       else
+               return -EWOULDBLOCK;
+}
+
 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
        struct kvm_kernel_irq_routing_entry route;
@@ -178,6 +199,44 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
        return ret;
 }
 
+/*
+ * Deliver an IRQ in an atomic context if we can, or return a failure,
+ * user can retry in a process context.
+ * Return value:
+ *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
+ *  Other values - No need to retry.
+ */
+int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
+{
+       struct kvm_kernel_irq_routing_entry *e;
+       int ret = -EINVAL;
+       struct kvm_irq_routing_table *irq_rt;
+       struct hlist_node *n;
+
+       trace_kvm_set_irq(irq, level, irq_source_id);
+
+       /*
+        * Injection into either PIC or IOAPIC might need to scan all CPUs,
+        * which would need to be retried from thread context;  when same GSI
+        * is connected to both PIC and IOAPIC, we'd have to report a
+        * partial failure here.
+        * Since there's no easy way to do this, we only support injecting MSI
+        * which is limited to 1:1 GSI mapping.
+        */
+       rcu_read_lock();
+       irq_rt = rcu_dereference(kvm->irq_routing);
+       if (irq < irq_rt->nr_rt_entries)
+               hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
+                       if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+                               ret = kvm_set_msi_inatomic(e, kvm);
+                       else
+                               ret = -EWOULDBLOCK;
+                       break;
+               }
+       rcu_read_unlock();
+       return ret;
+}
+
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
        struct kvm_irq_ack_notifier *kian;