KVM: Fix guest shared interrupt with in-kernel irqchip
Sheng Yang [Wed, 15 Oct 2008 12:15:06 +0000 (20:15 +0800)]
Every call of kvm_set_irq() should offer an irq_source_id, which is
allocated by kvm_request_irq_source_id(). Based on irq_source_id, we
identify the irq source and implement logical OR for shared level
interrupts.

The allocated irq_source_id can be freed by kvm_free_irq_source_id().

Currently, we support at most sizeof(unsigned long) different irq sources.

[Amit: - rebase to kvm.git HEAD
       - move definition of KVM_USERSPACE_IRQ_SOURCE_ID to common file
       - move kvm_request_irq_source_id to the update_irq ioctl]

[Xiantao: - Add kvm/ia64 stuff and make it work for kvm/ia64 guests]

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>

arch/ia64/include/asm/kvm_host.h
arch/ia64/kvm/kvm-ia64.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/i8254.c
arch/x86/kvm/i8254.h
arch/x86/kvm/x86.c
include/linux/kvm_host.h
virt/kvm/irq_comm.c
virt/kvm/kvm_main.c

index 85db124..04c0b88 100644 (file)
@@ -417,6 +417,9 @@ struct kvm_arch {
        struct list_head assigned_dev_head;
        struct dmar_domain *intel_iommu_domain;
        struct hlist_head irq_ack_notifier_list;
+
+       unsigned long irq_sources_bitmap;
+       unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
 };
 
 union cpuid3_t {
index a312c9e..8a2b13f 100644 (file)
@@ -778,6 +778,9 @@ static void kvm_init_vm(struct kvm *kvm)
        kvm_build_io_pmt(kvm);
 
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+
+       /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+       set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
 }
 
 struct  kvm *kvm_arch_create_vm(void)
@@ -941,9 +944,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
                if (irqchip_in_kernel(kvm)) {
                        mutex_lock(&kvm->lock);
-                       kvm_ioapic_set_irq(kvm->arch.vioapic,
-                                               irq_event.irq,
-                                               irq_event.level);
+                       kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+                                   irq_event.irq, irq_event.level);
                        mutex_unlock(&kvm->lock);
                        r = 0;
                }
index 65679d0..8346be8 100644 (file)
@@ -364,6 +364,9 @@ struct kvm_arch{
 
        struct page *ept_identity_pagetable;
        bool ept_identity_pagetable_done;
+
+       unsigned long irq_sources_bitmap;
+       unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
 };
 
 struct kvm_vm_stat {
index 11c6725..8772dc9 100644 (file)
@@ -545,6 +545,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
        if (!pit)
                return NULL;
 
+       mutex_lock(&kvm->lock);
+       pit->irq_source_id = kvm_request_irq_source_id(kvm);
+       mutex_unlock(&kvm->lock);
+       if (pit->irq_source_id < 0)
+               return NULL;
+
        mutex_init(&pit->pit_state.lock);
        mutex_lock(&pit->pit_state.lock);
        spin_lock_init(&pit->pit_state.inject_lock);
@@ -587,6 +593,7 @@ void kvm_free_pit(struct kvm *kvm)
                mutex_lock(&kvm->arch.vpit->pit_state.lock);
                timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
                hrtimer_cancel(timer);
+               kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
                mutex_unlock(&kvm->arch.vpit->pit_state.lock);
                kfree(kvm->arch.vpit);
        }
@@ -595,8 +602,8 @@ void kvm_free_pit(struct kvm *kvm)
 static void __inject_pit_timer_intr(struct kvm *kvm)
 {
        mutex_lock(&kvm->lock);
-       kvm_set_irq(kvm, 0, 1);
-       kvm_set_irq(kvm, 0, 0);
+       kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
+       kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
        mutex_unlock(&kvm->lock);
 }
 
index e436d49..4178022 100644 (file)
@@ -44,6 +44,7 @@ struct kvm_pit {
        struct kvm_io_device speaker_dev;
        struct kvm *kvm;
        struct kvm_kpit_state pit_state;
+       int irq_source_id;
 };
 
 #define KVM_PIT_BASE_ADDRESS       0x40
index 4f0677d..f1f8ff2 100644 (file)
@@ -1742,7 +1742,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
                if (irqchip_in_kernel(kvm)) {
                        mutex_lock(&kvm->lock);
-                       kvm_set_irq(kvm, irq_event.irq, irq_event.level);
+                       kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+                                   irq_event.irq, irq_event.level);
                        mutex_unlock(&kvm->lock);
                        r = 0;
                }
@@ -4013,6 +4014,9 @@ struct  kvm *kvm_arch_create_vm(void)
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
+       /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+       set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+
        return kvm;
 }
 
index 3833c48..bb92be2 100644 (file)
@@ -37,6 +37,8 @@
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
 
+#define KVM_USERSPACE_IRQ_SOURCE_ID    0
+
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
@@ -306,15 +308,18 @@ struct kvm_assigned_dev_kernel {
        int host_irq;
        int guest_irq;
        int irq_requested;
+       int irq_source_id;
        struct pci_dev *dev;
        struct kvm *kvm;
 };
-void kvm_set_irq(struct kvm *kvm, int irq, int level);
+void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
                                   struct kvm_irq_ack_notifier *kian);
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
                                     struct kvm_irq_ack_notifier *kian);
+int kvm_request_irq_source_id(struct kvm *kvm);
+void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
 #ifdef CONFIG_DMAR
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
index d0169f5..55ad76e 100644 (file)
 #include "ioapic.h"
 
 /* This should be called with the kvm->lock mutex held */
-void kvm_set_irq(struct kvm *kvm, int irq, int level)
+void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 {
+       unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
+
+       /* Logical OR for level trig interrupt */
+       if (level)
+               set_bit(irq_source_id, irq_state);
+       else
+               clear_bit(irq_source_id, irq_state);
+
        /* Not possible to detect if the guest uses the PIC or the
         * IOAPIC.  So set the bit in both. The guest will ignore
         * writes to the unused one.
         */
-       kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
+       kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state));
 #ifdef CONFIG_X86
-       kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
+       kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state));
 #endif
 }
 
@@ -58,3 +66,31 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 {
        hlist_del(&kian->link);
 }
+
+/* The caller must hold kvm->lock mutex */
+int kvm_request_irq_source_id(struct kvm *kvm)
+{
+       unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
+       int irq_source_id = find_first_zero_bit(bitmap,
+                               sizeof(kvm->arch.irq_sources_bitmap));
+       if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
+               printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
+               irq_source_id = -EFAULT;
+       } else
+               set_bit(irq_source_id, bitmap);
+       return irq_source_id;
+}
+
+void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
+{
+       int i;
+
+       if (irq_source_id <= 0 ||
+           irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
+               printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
+               return;
+       }
+       for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+               clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
+       clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+}
index cf0ab8e..a87f45e 100644 (file)
@@ -105,14 +105,12 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
         */
        mutex_lock(&assigned_dev->kvm->lock);
        kvm_set_irq(assigned_dev->kvm,
+                   assigned_dev->irq_source_id,
                    assigned_dev->guest_irq, 1);
        mutex_unlock(&assigned_dev->kvm->lock);
        kvm_put_kvm(assigned_dev->kvm);
 }
 
-/* FIXME: Implement the OR logic needed to make shared interrupts on
- * this line behave properly
- */
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
        struct kvm_assigned_dev_kernel *assigned_dev =
@@ -134,7 +132,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 
        dev = container_of(kian, struct kvm_assigned_dev_kernel,
                           ack_notifier);
-       kvm_set_irq(dev->kvm, dev->guest_irq, 0);
+       kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
        enable_irq(dev->host_irq);
 }
 
@@ -146,6 +144,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
                free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
        kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+       kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 
        if (cancel_work_sync(&assigned_dev->interrupt_work))
                /* We had pending work. That means we will have to take
@@ -215,6 +214,11 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
                match->ack_notifier.gsi = assigned_irq->guest_irq;
                match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
                kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
+               r = kvm_request_irq_source_id(kvm);
+               if (r < 0)
+                       goto out_release;
+               else
+                       match->irq_source_id = r;
 
                /* Even though this is PCI, we don't want to use shared
                 * interrupts. Sharing host devices with guest-assigned devices