Merge branch 'for-upstream' of https://github.com/agraf/linux-2.6 into queue

author Marcelo Tosatti <mtosatti@redhat.com>

Sun, 9 Dec 2012 20:44:10 +0000 (18:44 -0200)

committer Marcelo Tosatti <mtosatti@redhat.com>

Sun, 9 Dec 2012 20:44:10 +0000 (18:44 -0200)
author Marcelo Tosatti <mtosatti@redhat.com>
Sun, 9 Dec 2012 20:44:10 +0000 (18:44 -0200)
committer Marcelo Tosatti <mtosatti@redhat.com>
Sun, 9 Dec 2012 20:44:10 +0000 (18:44 -0200)
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h

index 317ff1703d0b0a5e7eaf17b94fd93640b8e62997..28feeba2fdd62e2347e0e7e6fa8ce7f2d80d33c3 100644 (file)
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -163,6 +163,8 @@ struct kimage_arch {
  };
  #endif
  
+extern void (*crash_vmclear_loaded_vmcss)(void);
+
  #endif /* __ASSEMBLY__ */
  
  #endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h

index 36ec21c36d68dad618f31e1be5bf423950262f26..c2d56b34830dc851b4d14781b231fa7380f38181 100644 (file)
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -445,8 +445,7 @@ enum vmcs_field {
  #define VMX_EPTP_WB_BIT                                (1ull << 14)
  #define VMX_EPT_2MB_PAGE_BIT                   (1ull << 16)
  #define VMX_EPT_1GB_PAGE_BIT                   (1ull << 17)
-#define VMX_EPT_AD_BIT                                 (1ull << 21)
-#define VMX_EPT_EXTENT_INDIVIDUAL_BIT          (1ull << 24)
+#define VMX_EPT_AD_BIT                             (1ull << 21)
  #define VMX_EPT_EXTENT_CONTEXT_BIT             (1ull << 25)
  #define VMX_EPT_EXTENT_GLOBAL_BIT              (1ull << 26)
  
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c

index 13ad89971d474557c4825d22dcb6d7da6e49ffc1..2f6b8e838d18f98122c148655d657d5c4f37d07c 100644 (file)
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -16,6 +16,7 @@
  #include <linux/delay.h>
  #include <linux/elf.h>
  #include <linux/elfcore.h>
+#include <linux/module.h>
  
  #include <asm/processor.h>
  #include <asm/hardirq.h>
@@ -30,6 +31,27 @@
  
  int in_crash_kexec;
  
+/*
+ * This is used to VMCLEAR all VMCSs loaded on the
+ * processor. And when loading kvm_intel module, the
+ * callback function pointer will be assigned.
+ *
+ * protected by rcu.
+ */
+void (*crash_vmclear_loaded_vmcss)(void) = NULL;
+EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+
+static inline void cpu_crash_vmclear_loaded_vmcss(void)
+{
+       void (*do_vmclear_operation)(void) = NULL;
+
+       rcu_read_lock();
+       do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
+       if (do_vmclear_operation)
+               do_vmclear_operation();
+       rcu_read_unlock();
+}
+
  #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
  
  static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
  #endif
         crash_save_cpu(regs, cpu);
  
+       /*
+        * VMCLEAR VMCSs loaded on all cpus if needed.
+        */
+       cpu_crash_vmclear_loaded_vmcss();
+
         /* Disable VMX or SVM if needed.
          *
          * We need to disable virtualization on all CPUs.
@@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
  
         kdump_nmi_shootdown_cpus();
  
+       /*
+        * VMCLEAR VMCSs loaded on this cpu if needed.
+        */
+       cpu_crash_vmclear_loaded_vmcss();
+
         /* Booting kdump kernel with VMX or SVM enabled won't work,
          * because (among other limitations) we can't disable paging
          * with the virt flags.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c

index 52f6166ef92c23b9c6f555f6b22784ce3f837a45..a20ecb5b6cbf3543490ab6a74a969aa45a1862c5 100644 (file)
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -661,6 +661,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
         } else
                 *eax = *ebx = *ecx = *edx = 0;
  }
+EXPORT_SYMBOL_GPL(kvm_cpuid);
  
  void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
  {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index b875a9ed9b8e3d75dc92e2889b58ed12926c3906..01d7c2ad05f5760bb656fef17f512c66e5e87dce 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2382,12 +2382,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
             || (!vcpu->arch.mmu.direct_map && write_fault
                 && !is_write_protection(vcpu) && !user_fault)) {
  
+               /*
+                * There are two cases:
+                * - the one is other vcpu creates new sp in the window
+                *   between mapping_level() and acquiring mmu-lock.
+                * - the another case is the new sp is created by itself
+                *   (page-fault path) when guest uses the target gfn as
+                *   its page table.
+                * Both of these cases can be fixed by allowing guest to
+                * retry the access, it will refault, then we can establish
+                * the mapping by using small page.
+                */
                 if (level > PT_PAGE_TABLE_LEVEL &&
-                   has_wrprotected_page(vcpu->kvm, gfn, level)) {
-                       ret = 1;
-                       drop_spte(vcpu->kvm, sptep);
+                   has_wrprotected_page(vcpu->kvm, gfn, level))
                         goto done;
-               }
  
                 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
  
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index dcb79527e7aaadd35ca6301122ea58f98e310fc7..d29d3cd1c15657619086e5e4877766aaefc02214 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -20,6 +20,7 @@
  #include "mmu.h"
  #include "kvm_cache_regs.h"
  #include "x86.h"
+#include "cpuid.h"
  
  #include <linux/module.h>
  #include <linux/mod_devicetable.h>
@@ -1193,6 +1194,8 @@ static void init_vmcb(struct vcpu_svm *svm)
  static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
+       u32 dummy;
+       u32 eax = 1;
  
         init_vmcb(svm);
  
@@ -1201,8 +1204,9 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
                 svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
                 svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
         }
-       vcpu->arch.regs_avail = ~0;
-       vcpu->arch.regs_dirty = ~0;
+
+       kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
+       kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
  
         return 0;
  }
@@ -1259,10 +1263,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
         svm->asid_generation = 0;
         init_vmcb(svm);
  
-       err = fx_init(&svm->vcpu);
-       if (err)
-               goto free_page4;
-
         svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
         if (kvm_vcpu_is_bsp(&svm->vcpu))
                 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
@@ -1271,8 +1271,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
  
         return &svm->vcpu;
  
-free_page4:
-       __free_page(hsave_page);
  free_page3:
         __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
  free_page2:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 2fd2046dc94c15c0bf17c82aa35bbbf5252fc459..1a30fd5c3fb2b63ac52f2b8ddb81e7f8e6404271 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -42,6 +42,7 @@
  #include <asm/i387.h>
  #include <asm/xcr.h>
  #include <asm/perf_event.h>
+#include <asm/kexec.h>
  
  #include "trace.h"
  
@@ -802,11 +803,6 @@ static inline bool cpu_has_vmx_ept_ad_bits(void)
         return vmx_capability.ept & VMX_EPT_AD_BIT;
  }
  
-static inline bool cpu_has_vmx_invept_individual_addr(void)
-{
-       return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
-}
-
  static inline bool cpu_has_vmx_invept_context(void)
  {
         return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
@@ -992,6 +988,46 @@ static void vmcs_load(struct vmcs *vmcs)
                        vmcs, phys_addr);
  }
  
+#ifdef CONFIG_KEXEC
+/*
+ * This bitmap is used to indicate whether the vmclear
+ * operation is enabled on all cpus. All disabled by
+ * default.
+ */
+static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
+
+static inline void crash_enable_local_vmclear(int cpu)
+{
+       cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static inline void crash_disable_local_vmclear(int cpu)
+{
+       cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static inline int crash_local_vmclear_enabled(int cpu)
+{
+       return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static void crash_vmclear_local_loaded_vmcss(void)
+{
+       int cpu = raw_smp_processor_id();
+       struct loaded_vmcs *v;
+
+       if (!crash_local_vmclear_enabled(cpu))
+               return;
+
+       list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
+                           loaded_vmcss_on_cpu_link)
+               vmcs_clear(v->vmcs);
+}
+#else
+static inline void crash_enable_local_vmclear(int cpu) { }
+static inline void crash_disable_local_vmclear(int cpu) { }
+#endif /* CONFIG_KEXEC */
+
  static void __loaded_vmcs_clear(void *arg)
  {
         struct loaded_vmcs *loaded_vmcs = arg;
@@ -1001,6 +1037,7 @@ static void __loaded_vmcs_clear(void *arg)
                 return; /* vcpu migration can race with cpu offline */
         if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
                 per_cpu(current_vmcs, cpu) = NULL;
+       crash_disable_local_vmclear(cpu);
         list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
  
         /*
@@ -1012,6 +1049,7 @@ static void __loaded_vmcs_clear(void *arg)
         smp_wmb();
  
         loaded_vmcs_init(loaded_vmcs);
+       crash_enable_local_vmclear(cpu);
  }
  
  static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
@@ -1062,17 +1100,6 @@ static inline void ept_sync_context(u64 eptp)
         }
  }
  
-static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
-{
-       if (enable_ept) {
-               if (cpu_has_vmx_invept_individual_addr())
-                       __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
-                                       eptp, gpa);
-               else
-                       ept_sync_context(eptp);
-       }
-}
-
  static __always_inline unsigned long vmcs_readl(unsigned long field)
  {
         unsigned long value;
@@ -1546,6 +1573,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  
                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
                 local_irq_disable();
+               crash_disable_local_vmclear(cpu);
  
                 /*
                  * Read loaded_vmcs->cpu should be before fetching
@@ -1556,6 +1584,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  
                 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
                          &per_cpu(loaded_vmcss_on_cpu, cpu));
+               crash_enable_local_vmclear(cpu);
                 local_irq_enable();
  
                 /*
@@ -2369,6 +2398,18 @@ static int hardware_enable(void *garbage)
                 return -EBUSY;
  
         INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+
+       /*
+        * Now we can enable the vmclear operation in kdump
+        * since the loaded_vmcss_on_cpu list on this cpu
+        * has been initialized.
+        *
+        * Though the cpu is not in VMX operation now, there
+        * is no problem to enable the vmclear operation
+        * for the loaded_vmcss_on_cpu list is empty!
+        */
+       crash_enable_local_vmclear(cpu);
+
         rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
  
         test_bits = FEATURE_CONTROL_LOCKED;
@@ -3934,8 +3975,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
         u64 msr;
         int ret;
  
-       vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
-
         vmx->rmode.vm86_active = 0;
  
         vmx->soft_vnmi_blocked = 0;
@@ -3947,10 +3986,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                 msr |= MSR_IA32_APICBASE_BSP;
         kvm_set_apic_base(&vmx->vcpu, msr);
  
-       ret = fx_init(&vmx->vcpu);
-       if (ret != 0)
-               goto out;
-
         vmx_segment_cache_clear(vmx);
  
         seg_setup(VCPU_SREG_CS);
@@ -3991,7 +4026,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                 kvm_rip_write(vcpu, 0xfff0);
         else
                 kvm_rip_write(vcpu, 0);
-       kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
  
         vmcs_writel(GUEST_GDTR_BASE, 0);
         vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
@@ -4041,7 +4075,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
         /* HACK: Don't enable emulation on guest boot/reset */
         vmx->emulation_required = 0;
  
-out:
         return ret;
  }
  
@@ -4863,11 +4896,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
  
         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
  
-       if (exit_qualification & (1 << 6)) {
-               printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
-               return -EINVAL;
-       }
-
         gla_validity = (exit_qualification >> 7) & 0x3;
         if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
                 printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
@@ -7412,6 +7440,11 @@ static int __init vmx_init(void)
         if (r)
                 goto out3;
  
+#ifdef CONFIG_KEXEC
+       rcu_assign_pointer(crash_vmclear_loaded_vmcss,
+                          crash_vmclear_local_loaded_vmcss);
+#endif
+
         vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
         vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
         vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
@@ -7449,6 +7482,11 @@ static void __exit vmx_exit(void)
         free_page((unsigned long)vmx_io_bitmap_b);
         free_page((unsigned long)vmx_io_bitmap_a);
  
+#ifdef CONFIG_KEXEC
+       rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
+       synchronize_rcu();
+#endif
+
         kvm_exit();
  }
  
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 3bdaf298b8c796dc7c5e6ecdc42ec56d453ed905..57c76e86e9bdf7fdfea56eae9f13040eed2ae089 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6461,6 +6461,10 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
  
         kvm_pmu_reset(vcpu);
  
+       memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
+       vcpu->arch.regs_avail = ~0;
+       vcpu->arch.regs_dirty = ~0;
+
         return kvm_x86_ops->vcpu_reset(vcpu);
  }
  
@@ -6629,11 +6633,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
         if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
                 goto fail_free_mce_banks;
  
+       r = fx_init(vcpu);
+       if (r)
+               goto fail_free_wbinvd_dirty_mask;
+
         vcpu->arch.ia32_tsc_adjust_msr = 0x0;
         kvm_async_pf_hash_reset(vcpu);
         kvm_pmu_init(vcpu);
  
         return 0;
+fail_free_wbinvd_dirty_mask:
+       free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
  fail_free_mce_banks:
         kfree(vcpu->arch.mce_banks);
  fail_free_lapic:
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index c823e47c364152bfb4e67af98222833ca0f244f6..91ae127f4ac57ba5dd08da48095b906f526bad4b 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -693,6 +693,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
                                    unsigned long *deliver_bitmask);
  #endif
  int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
  int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
                 int irq_source_id, int level);
  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c

index 23a41a9f8db999f4b70fd4c0cd4b6ae8b26ee3c2..3642239252b0015593fe690ec0fa83ae5b841145 100644 (file)
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -105,6 +105,15 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
  }
  
  #ifdef __KVM_HAVE_MSI
+static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+       int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+                                      assigned_dev->irq_source_id,
+                                      assigned_dev->guest_irq, 1);
+       return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
  static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
  {
         struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
@@ -117,6 +126,23 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
  #endif
  
  #ifdef __KVM_HAVE_MSIX
+static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+       int index = find_index_from_host_irq(assigned_dev, irq);
+       u32 vector;
+       int ret = 0;
+
+       if (index >= 0) {
+               vector = assigned_dev->guest_msix_entries[index].vector;
+               ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+                                          assigned_dev->irq_source_id,
+                                          vector, 1);
+       }
+
+       return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
  static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
  {
         struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
@@ -334,11 +360,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
  }
  
  #ifdef __KVM_HAVE_MSI
-static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
-{
-       return IRQ_WAKE_THREAD;
-}
-
  static int assigned_device_enable_host_msi(struct kvm *kvm,
                                            struct kvm_assigned_dev_kernel *dev)
  {
@@ -363,11 +384,6 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
  #endif
  
  #ifdef __KVM_HAVE_MSIX
-static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
-{
-       return IRQ_WAKE_THREAD;
-}
-
  static int assigned_device_enable_host_msix(struct kvm *kvm,
                                             struct kvm_assigned_dev_kernel *dev)
  {
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c

index 2eb58af7ee99268b81a01680db591ea4991c47eb..656fa455e154abedceba3883cf865adfa73f873b 100644 (file)
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -102,6 +102,23 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
         return r;
  }
  
+static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+                                  struct kvm_lapic_irq *irq)
+{
+       trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+
+       irq->dest_id = (e->msi.address_lo &
+                       MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+       irq->vector = (e->msi.data &
+                       MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+       irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+       irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
+       irq->delivery_mode = e->msi.data & 0x700;
+       irq->level = 1;
+       irq->shorthand = 0;
+       /* TODO Deal with RH bit of MSI message address */
+}
+
  int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
                 struct kvm *kvm, int irq_source_id, int level)
  {
@@ -110,22 +127,26 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
         if (!level)
                 return -1;
  
-       trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+       kvm_set_msi_irq(e, &irq);
  
-       irq.dest_id = (e->msi.address_lo &
-                       MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
-       irq.vector = (e->msi.data &
-                       MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
-       irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
-       irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
-       irq.delivery_mode = e->msi.data & 0x700;
-       irq.level = 1;
-       irq.shorthand = 0;
-
-       /* TODO Deal with RH bit of MSI message address */
         return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
  }
  
+
+static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
+                        struct kvm *kvm)
+{
+       struct kvm_lapic_irq irq;
+       int r;
+
+       kvm_set_msi_irq(e, &irq);
+
+       if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r))
+               return r;
+       else
+               return -EWOULDBLOCK;
+}
+
  int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
  {
         struct kvm_kernel_irq_routing_entry route;
@@ -178,6 +199,44 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
         return ret;
  }
  
+/*
+ * Deliver an IRQ in an atomic context if we can, or return a failure,
+ * user can retry in a process context.
+ * Return value:
+ *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
+ *  Other values - No need to retry.
+ */
+int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
+{
+       struct kvm_kernel_irq_routing_entry *e;
+       int ret = -EINVAL;
+       struct kvm_irq_routing_table *irq_rt;
+       struct hlist_node *n;
+
+       trace_kvm_set_irq(irq, level, irq_source_id);
+
+       /*
+        * Injection into either PIC or IOAPIC might need to scan all CPUs,
+        * which would need to be retried from thread context;  when same GSI
+        * is connected to both PIC and IOAPIC, we'd have to report a
+        * partial failure here.
+        * Since there's no easy way to do this, we only support injecting MSI
+        * which is limited to 1:1 GSI mapping.
+        */
+       rcu_read_lock();
+       irq_rt = rcu_dereference(kvm->irq_routing);
+       if (irq < irq_rt->nr_rt_entries)
+               hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
+                       if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+                               ret = kvm_set_msi_inatomic(e, kvm);
+                       else
+                               ret = -EWOULDBLOCK;
+                       break;
+               }
+       rcu_read_unlock();
+       return ret;
+}
+
  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
  {
         struct kvm_irq_ack_notifier *kian;
author	Marcelo Tosatti <mtosatti@redhat.com>
	Sun, 9 Dec 2012 20:44:10 +0000 (18:44 -0200)
committer	Marcelo Tosatti <mtosatti@redhat.com>
	Sun, 9 Dec 2012 20:44:10 +0000 (18:44 -0200)
arch/x86/include/asm/kexec.h		patch \| blob \| history
arch/x86/include/asm/vmx.h		patch \| blob \| history
arch/x86/kernel/crash.c		patch \| blob \| history
arch/x86/kvm/cpuid.c		patch \| blob \| history
arch/x86/kvm/mmu.c		patch \| blob \| history
arch/x86/kvm/svm.c		patch \| blob \| history
arch/x86/kvm/vmx.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
include/linux/kvm_host.h		patch \| blob \| history
virt/kvm/assigned-dev.c		patch \| blob \| history
virt/kvm/irq_comm.c		patch \| blob \| history