KVM: remove unnecessary return value check
[linux-3.10.git] / arch / x86 / kvm / x86.c
index ebf2109..c31f75d 100644 (file)
@@ -158,7 +158,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 u64 __read_mostly host_xcr0;
 
-int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
+static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
+
+static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
 
 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
 {
@@ -246,20 +248,14 @@ static void drop_user_return_notifiers(void *ignore)
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
 {
-       if (irqchip_in_kernel(vcpu->kvm))
-               return vcpu->arch.apic_base;
-       else
-               return vcpu->arch.apic_base;
+       return vcpu->arch.apic_base;
 }
 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
 {
        /* TODO: reserve bits check */
-       if (irqchip_in_kernel(vcpu->kvm))
-               kvm_lapic_set_base(vcpu, data);
-       else
-               vcpu->arch.apic_base = data;
+       kvm_lapic_set_base(vcpu, data);
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
@@ -639,7 +635,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        }
 
        if (is_long_mode(vcpu)) {
-               if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
+               if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
                        if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
                                return 1;
                } else
@@ -698,6 +694,18 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr7(struct kvm_vcpu *vcpu)
+{
+       unsigned long dr7;
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+               dr7 = vcpu->arch.guest_debug_dr7;
+       else
+               dr7 = vcpu->arch.dr7;
+       kvm_x86_ops->set_dr7(vcpu, dr7);
+       vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
+}
+
 static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 {
        switch (dr) {
@@ -723,10 +731,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
                if (val & 0xffffffff00000000ULL)
                        return -1; /* #GP */
                vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
-               if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
-                       kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
-                       vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
-               }
+               kvm_update_dr7(vcpu);
                break;
        }
 
@@ -823,7 +828,7 @@ static u32 msrs_to_save[] = {
 
 static unsigned num_msrs_to_save;
 
-static u32 emulated_msrs[] = {
+static const u32 emulated_msrs[] = {
        MSR_IA32_TSCDEADLINE,
        MSR_IA32_MISC_ENABLE,
        MSR_IA32_MCG_STATUS,
@@ -1140,6 +1145,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        unsigned long this_tsc_khz;
        s64 kernel_ns, max_kernel_ns;
        u64 tsc_timestamp;
+       u8 pvclock_flags;
 
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
@@ -1221,7 +1227,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
        vcpu->last_kernel_ns = kernel_ns;
        vcpu->last_guest_tsc = tsc_timestamp;
-       vcpu->hv_clock.flags = 0;
+
+       pvclock_flags = 0;
+       if (vcpu->pvclock_set_guest_stopped_request) {
+               pvclock_flags |= PVCLOCK_GUEST_STOPPED;
+               vcpu->pvclock_set_guest_stopped_request = false;
+       }
+
+       vcpu->hv_clock.flags = pvclock_flags;
 
        /*
         * The interface expects us to write an even number signaling that the
@@ -1639,10 +1652,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                vcpu->arch.time_page =
                                gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
 
-               if (is_error_page(vcpu->arch.time_page)) {
-                       kvm_release_page_clean(vcpu->arch.time_page);
+               if (is_error_page(vcpu->arch.time_page))
                        vcpu->arch.time_page = NULL;
-               }
+
                break;
        }
        case MSR_KVM_ASYNC_PF_EN:
@@ -2000,6 +2012,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_KVM_STEAL_TIME:
                data = vcpu->arch.st.msr_val;
                break;
+       case MSR_KVM_PV_EOI_EN:
+               data = vcpu->arch.pv_eoi.msr_val;
+               break;
        case MSR_IA32_P5_MC_ADDR:
        case MSR_IA32_P5_MC_TYPE:
        case MSR_IA32_MCG_CAP:
@@ -2174,6 +2189,8 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_GET_TSC_KHZ:
        case KVM_CAP_PCI_2_3:
        case KVM_CAP_KVMCLOCK_CTRL:
+       case KVM_CAP_READONLY_MEM:
+       case KVM_CAP_IRQFD_RESAMPLE:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@ -2355,8 +2372,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
                                    struct kvm_lapic_state *s)
 {
-       memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
-       kvm_apic_post_state_restore(vcpu);
+       kvm_apic_post_state_restore(vcpu, s);
        update_cr8_intercept(vcpu);
 
        return 0;
@@ -2365,7 +2381,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
                                    struct kvm_interrupt *irq)
 {
-       if (irq->irq < 0 || irq->irq >= 256)
+       if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS)
                return -EINVAL;
        if (irqchip_in_kernel(vcpu->kvm))
                return -ENXIO;
@@ -2632,10 +2648,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
  */
 static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
 {
-       struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
        if (!vcpu->arch.time_page)
                return -EINVAL;
-       src->flags |= PVCLOCK_GUEST_STOPPED;
+       vcpu->arch.pvclock_set_guest_stopped_request = true;
        kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
        return 0;
 }
@@ -2674,19 +2689,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                break;
        }
        case KVM_SET_LAPIC: {
-               r = -EINVAL;
                if (!vcpu->arch.apic)
                        goto out;
                u.lapic = memdup_user(argp, sizeof(*u.lapic));
-               if (IS_ERR(u.lapic)) {
-                       r = PTR_ERR(u.lapic);
-                       goto out;
-               }
+               if (IS_ERR(u.lapic))
+                       return PTR_ERR(u.lapic);
 
                r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
-               if (r)
-                       goto out;
-               r = 0;
                break;
        }
        case KVM_INTERRUPT: {
@@ -2696,16 +2705,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                if (copy_from_user(&irq, argp, sizeof irq))
                        goto out;
                r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
-               if (r)
-                       goto out;
-               r = 0;
                break;
        }
        case KVM_NMI: {
                r = kvm_vcpu_ioctl_nmi(vcpu);
-               if (r)
-                       goto out;
-               r = 0;
                break;
        }
        case KVM_SET_CPUID: {
@@ -2716,8 +2719,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
                        goto out;
                r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
-               if (r)
-                       goto out;
                break;
        }
        case KVM_SET_CPUID2: {
@@ -2729,8 +2730,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                        goto out;
                r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
                                              cpuid_arg->entries);
-               if (r)
-                       goto out;
                break;
        }
        case KVM_GET_CPUID2: {
@@ -2862,10 +2861,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        }
        case KVM_SET_XSAVE: {
                u.xsave = memdup_user(argp, sizeof(*u.xsave));
-               if (IS_ERR(u.xsave)) {
-                       r = PTR_ERR(u.xsave);
-                       goto out;
-               }
+               if (IS_ERR(u.xsave))
+                       return PTR_ERR(u.xsave);
 
                r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
                break;
@@ -2887,10 +2884,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        }
        case KVM_SET_XCRS: {
                u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
-               if (IS_ERR(u.xcrs)) {
-                       r = PTR_ERR(u.xcrs);
-                       goto out;
-               }
+               if (IS_ERR(u.xcrs))
+                       return PTR_ERR(u.xcrs);
 
                r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
                break;
@@ -2938,7 +2933,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
        int ret;
 
        if (addr > (unsigned int)(-3 * PAGE_SIZE))
-               return -1;
+               return -EINVAL;
        ret = kvm_x86_ops->set_tss_addr(kvm, addr);
        return ret;
 }
@@ -3199,8 +3194,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
        switch (ioctl) {
        case KVM_SET_TSS_ADDR:
                r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
-               if (r < 0)
-                       goto out;
                break;
        case KVM_SET_IDENTITY_MAP_ADDR: {
                u64 ident_addr;
@@ -3209,14 +3202,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
                if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
                        goto out;
                r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
-               if (r < 0)
-                       goto out;
                break;
        }
        case KVM_SET_NR_MMU_PAGES:
                r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
-               if (r)
-                       goto out;
                break;
        case KVM_GET_NR_MMU_PAGES:
                r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
@@ -3307,8 +3296,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = 0;
        get_irqchip_out:
                kfree(chip);
-               if (r)
-                       goto out;
                break;
        }
        case KVM_SET_IRQCHIP: {
@@ -3330,8 +3317,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = 0;
        set_irqchip_out:
                kfree(chip);
-               if (r)
-                       goto out;
                break;
        }
        case KVM_GET_PIT: {
@@ -3358,9 +3343,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
                if (!kvm->arch.vpit)
                        goto out;
                r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
-               if (r)
-                       goto out;
-               r = 0;
                break;
        }
        case KVM_GET_PIT2: {
@@ -3384,9 +3366,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
                if (!kvm->arch.vpit)
                        goto out;
                r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
-               if (r)
-                       goto out;
-               r = 0;
                break;
        }
        case KVM_REINJECT_CONTROL: {
@@ -3395,9 +3374,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
                if (copy_from_user(&control, argp, sizeof(control)))
                        goto out;
                r = kvm_vm_ioctl_reinject(kvm, &control);
-               if (r)
-                       goto out;
-               r = 0;
                break;
        }
        case KVM_XEN_HVM_CONFIG: {
@@ -3672,20 +3648,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
                                gpa_t *gpa, struct x86_exception *exception,
                                bool write)
 {
-       u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+       u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
+               | (write ? PFERR_WRITE_MASK : 0);
 
-       if (vcpu_match_mmio_gva(vcpu, gva) &&
-                 check_write_user_access(vcpu, write, access,
-                 vcpu->arch.access)) {
+       if (vcpu_match_mmio_gva(vcpu, gva)
+           && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
                *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
                                        (gva & (PAGE_SIZE - 1));
                trace_vcpu_match_mmio(gva, *gpa, write, false);
                return 1;
        }
 
-       if (write)
-               access |= PFERR_WRITE_MASK;
-
        *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
 
        if (*gpa == UNMAPPED_GVA)
@@ -3773,14 +3746,14 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
        return X86EMUL_CONTINUE;
 }
 
-static struct read_write_emulator_ops read_emultor = {
+static const struct read_write_emulator_ops read_emultor = {
        .read_write_prepare = read_prepare,
        .read_write_emulate = read_emulate,
        .read_write_mmio = vcpu_mmio_read,
        .read_write_exit_mmio = read_exit_mmio,
 };
 
-static struct read_write_emulator_ops write_emultor = {
+static const struct read_write_emulator_ops write_emultor = {
        .read_write_emulate = write_emulate,
        .read_write_mmio = write_mmio,
        .read_write_exit_mmio = write_exit_mmio,
@@ -3791,7 +3764,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
                                       unsigned int bytes,
                                       struct x86_exception *exception,
                                       struct kvm_vcpu *vcpu,
-                                      struct read_write_emulator_ops *ops)
+                                      const struct read_write_emulator_ops *ops)
 {
        gpa_t gpa;
        int handled, ret;
@@ -3840,7 +3813,7 @@ mmio:
 int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
                        void *val, unsigned int bytes,
                        struct x86_exception *exception,
-                       struct read_write_emulator_ops *ops)
+                       const struct read_write_emulator_ops *ops)
 {
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
        gpa_t gpa;
@@ -3945,10 +3918,8 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
                goto emul_write;
 
        page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-       if (is_error_page(page)) {
-               kvm_release_page_clean(page);
+       if (is_error_page(page))
                goto emul_write;
-       }
 
        kaddr = kmap_atomic(page);
        kaddr += offset_in_page(gpa);
@@ -4315,7 +4286,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
        kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
 }
 
-static struct x86_emulate_ops emulate_ops = {
+static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
+{
+       return kvm_register_read(emul_to_vcpu(ctxt), reg);
+}
+
+static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
+{
+       kvm_register_write(emul_to_vcpu(ctxt), reg, val);
+}
+
+static const struct x86_emulate_ops emulate_ops = {
+       .read_gpr            = emulator_read_gpr,
+       .write_gpr           = emulator_write_gpr,
        .read_std            = kvm_read_guest_virt_system,
        .write_std           = kvm_write_guest_virt_system,
        .fetch               = kvm_fetch_guest_virt,
@@ -4350,14 +4333,6 @@ static struct x86_emulate_ops emulate_ops = {
        .get_cpuid           = emulator_get_cpuid,
 };
 
-static void cache_all_regs(struct kvm_vcpu *vcpu)
-{
-       kvm_register_read(vcpu, VCPU_REGS_RAX);
-       kvm_register_read(vcpu, VCPU_REGS_RSP);
-       kvm_register_read(vcpu, VCPU_REGS_RIP);
-       vcpu->arch.regs_dirty = ~0;
-}
-
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
 {
        u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
@@ -4384,12 +4359,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
                kvm_queue_exception(vcpu, ctxt->exception.vector);
 }
 
-static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
-                             const unsigned long *regs)
+static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
 {
        memset(&ctxt->twobyte, 0,
-              (void *)&ctxt->regs - (void *)&ctxt->twobyte);
-       memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
+              (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
 
        ctxt->fetch.start = 0;
        ctxt->fetch.end = 0;
@@ -4404,14 +4377,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        int cs_db, cs_l;
 
-       /*
-        * TODO: fix emulate.c to use guest_read/write_register
-        * instead of direct ->regs accesses, can save hundred cycles
-        * on Intel for instructions that don't read/change RSP, for
-        * for example.
-        */
-       cache_all_regs(vcpu);
-
        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
        ctxt->eflags = kvm_get_rflags(vcpu);
@@ -4423,7 +4388,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
                                                          X86EMUL_MODE_PROT16;
        ctxt->guest_mode = is_guest_mode(vcpu);
 
-       init_decode_cache(ctxt, vcpu->arch.regs);
+       init_decode_cache(ctxt);
        vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
@@ -4443,7 +4408,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
                return EMULATE_FAIL;
 
        ctxt->eip = ctxt->_eip;
-       memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
        kvm_rip_write(vcpu, ctxt->eip);
        kvm_set_rflags(vcpu, ctxt->eflags);
 
@@ -4476,6 +4440,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 {
        gpa_t gpa;
+       pfn_t pfn;
 
        if (tdp_enabled)
                return false;
@@ -4493,8 +4458,17 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
        if (gpa == UNMAPPED_GVA)
                return true; /* let cpu generate fault */
 
-       if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
+       /*
+        * Do not retry the unhandleable instruction if it faults on the
+        * readonly host memory, otherwise it will goto a infinite loop:
+        * retry instruction -> write #PF -> emulation fail -> retry
+        * instruction -> ...
+        */
+       pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
+       if (!is_error_noslot_pfn(pfn)) {
+               kvm_release_pfn_clean(pfn);
                return true;
+       }
 
        return false;
 }
@@ -4543,6 +4517,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
        return true;
 }
 
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
+static int complete_emulated_pio(struct kvm_vcpu *vcpu);
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                            unsigned long cr2,
                            int emulation_type,
@@ -4591,7 +4568,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
           changes registers values  during IO operation */
        if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
                vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
-               memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
+               emulator_invalidate_register_cache(ctxt);
        }
 
 restart:
@@ -4613,13 +4590,16 @@ restart:
        } else if (vcpu->arch.pio.count) {
                if (!vcpu->arch.pio.in)
                        vcpu->arch.pio.count = 0;
-               else
+               else {
                        writeback = false;
+                       vcpu->arch.complete_userspace_io = complete_emulated_pio;
+               }
                r = EMULATE_DO_MMIO;
        } else if (vcpu->mmio_needed) {
                if (!vcpu->mmio_is_write)
                        writeback = false;
                r = EMULATE_DO_MMIO;
+               vcpu->arch.complete_userspace_io = complete_emulated_mmio;
        } else if (r == EMULATION_RESTART)
                goto restart;
        else
@@ -4629,7 +4609,6 @@ restart:
                toggle_interruptibility(vcpu, ctxt->interruptibility);
                kvm_set_rflags(vcpu, ctxt->eflags);
                kvm_make_request(KVM_REQ_EVENT, vcpu);
-               memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
                kvm_rip_write(vcpu, ctxt->eip);
        } else
@@ -4912,6 +4891,7 @@ int kvm_arch_init(void *opaque)
        if (cpu_has_xsave)
                host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 
+       kvm_lapic_init();
        return 0;
 
 out:
@@ -5049,7 +5029,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
 
-int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
+static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
 {
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
        char instruction[3];
@@ -5096,17 +5076,20 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
                        !kvm_event_needs_reinjection(vcpu);
 }
 
-static void vapic_enter(struct kvm_vcpu *vcpu)
+static int vapic_enter(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
        struct page *page;
 
        if (!apic || !apic->vapic_addr)
-               return;
+               return 0;
 
        page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
+       if (is_error_page(page))
+               return -EFAULT;
 
        vcpu->arch.apic->vapic_page = page;
+       return 0;
 }
 
 static void vapic_exit(struct kvm_vcpu *vcpu)
@@ -5406,14 +5389,18 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                pr_debug("vcpu %d received sipi with vector # %x\n",
                         vcpu->vcpu_id, vcpu->arch.sipi_vector);
                kvm_lapic_reset(vcpu);
-               r = kvm_arch_vcpu_reset(vcpu);
+               r = kvm_vcpu_reset(vcpu);
                if (r)
                        return r;
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
        }
 
        vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-       vapic_enter(vcpu);
+       r = vapic_enter(vcpu);
+       if (r) {
+               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               return r;
+       }
 
        r = 1;
        while (r > 0) {
@@ -5475,6 +5462,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
        return r;
 }
 
+static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
+{
+       int r;
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+       if (r != EMULATE_DONE)
+               return 0;
+       return 1;
+}
+
+static int complete_emulated_pio(struct kvm_vcpu *vcpu)
+{
+       BUG_ON(!vcpu->arch.pio.count);
+
+       return complete_emulated_io(vcpu);
+}
+
 /*
  * Implements the following, as a state machine:
  *
@@ -5491,47 +5496,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
  *      copy data
  *      exit
  */
-static int complete_mmio(struct kvm_vcpu *vcpu)
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
        struct kvm_mmio_fragment *frag;
-       int r;
 
-       if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
-               return 1;
+       BUG_ON(!vcpu->mmio_needed);
 
-       if (vcpu->mmio_needed) {
-               /* Complete previous fragment */
-               frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
-               if (!vcpu->mmio_is_write)
-                       memcpy(frag->data, run->mmio.data, frag->len);
-               if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
-                       vcpu->mmio_needed = 0;
-                       if (vcpu->mmio_is_write)
-                               return 1;
-                       vcpu->mmio_read_completed = 1;
-                       goto done;
-               }
-               /* Initiate next fragment */
-               ++frag;
-               run->exit_reason = KVM_EXIT_MMIO;
-               run->mmio.phys_addr = frag->gpa;
+       /* Complete previous fragment */
+       frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
+       if (!vcpu->mmio_is_write)
+               memcpy(frag->data, run->mmio.data, frag->len);
+       if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+               vcpu->mmio_needed = 0;
                if (vcpu->mmio_is_write)
-                       memcpy(run->mmio.data, frag->data, frag->len);
-               run->mmio.len = frag->len;
-               run->mmio.is_write = vcpu->mmio_is_write;
-               return 0;
-
-       }
-done:
-       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-       r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-       if (r != EMULATE_DONE)
-               return 0;
-       return 1;
+                       return 1;
+               vcpu->mmio_read_completed = 1;
+               return complete_emulated_io(vcpu);
+       }
+       /* Initiate next fragment */
+       ++frag;
+       run->exit_reason = KVM_EXIT_MMIO;
+       run->mmio.phys_addr = frag->gpa;
+       if (vcpu->mmio_is_write)
+               memcpy(run->mmio.data, frag->data, frag->len);
+       run->mmio.len = frag->len;
+       run->mmio.is_write = vcpu->mmio_is_write;
+       vcpu->arch.complete_userspace_io = complete_emulated_mmio;
+       return 0;
 }
 
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        int r;
@@ -5558,9 +5553,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                }
        }
 
-       r = complete_mmio(vcpu);
-       if (r <= 0)
-               goto out;
+       if (unlikely(vcpu->arch.complete_userspace_io)) {
+               int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
+               vcpu->arch.complete_userspace_io = NULL;
+               r = cui(vcpu);
+               if (r <= 0)
+                       goto out;
+       } else
+               WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
        r = __vcpu_run(vcpu);
 
@@ -5582,8 +5582,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
                 * that usually, but some bad designed PV devices (vmware
                 * backdoor interface) need this to work
                 */
-               struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
-               memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
+               emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
        }
        regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
@@ -5723,7 +5722,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
        if (ret)
                return EMULATE_FAIL;
 
-       memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
        kvm_rip_write(vcpu, ctxt->eip);
        kvm_set_rflags(vcpu, ctxt->eflags);
        kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -5775,7 +5773,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
 
-       max_bits = (sizeof sregs->interrupt_bitmap) << 3;
+       max_bits = KVM_NR_INTERRUPTS;
        pending_vec = find_first_bit(
                (const unsigned long *)sregs->interrupt_bitmap, max_bits);
        if (pending_vec < max_bits) {
@@ -5835,13 +5833,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
        if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
                for (i = 0; i < KVM_NR_DB_REGS; ++i)
                        vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
-               vcpu->arch.switch_db_regs =
-                       (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+               vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
        } else {
                for (i = 0; i < KVM_NR_DB_REGS; i++)
                        vcpu->arch.eff_db[i] = vcpu->arch.db[i];
-               vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
        }
+       kvm_update_dr7(vcpu);
 
        if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
                vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
@@ -5853,7 +5850,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
         */
        kvm_set_rflags(vcpu, rflags);
 
-       kvm_x86_ops->set_guest_debug(vcpu, dbg);
+       kvm_x86_ops->update_db_bp_intercept(vcpu);
 
        r = 0;
 
@@ -5955,7 +5952,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
         */
        kvm_put_guest_xcr0(vcpu);
        vcpu->guest_fpu_loaded = 1;
-       unlazy_fpu(current);
+       __kernel_fpu_begin();
        fpu_restore_checking(&vcpu->arch.guest_fpu);
        trace_kvm_fpu(1);
 }
@@ -5969,6 +5966,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
        vcpu->guest_fpu_loaded = 0;
        fpu_save_init(&vcpu->arch.guest_fpu);
+       __kernel_fpu_end();
        ++vcpu->stat.fpu_reload;
        kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
        trace_kvm_fpu(0);
@@ -5998,8 +5996,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        int r;
 
        vcpu->arch.mtrr_state.have_fixed = 1;
-       vcpu_load(vcpu);
-       r = kvm_arch_vcpu_reset(vcpu);
+       r = vcpu_load(vcpu);
+       if (r)
+               return r;
+       r = kvm_vcpu_reset(vcpu);
        if (r == 0)
                r = kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
@@ -6009,9 +6009,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+       int r;
        vcpu->arch.apf.msr_val = 0;
 
-       vcpu_load(vcpu);
+       r = vcpu_load(vcpu);
+       BUG_ON(r);
        kvm_mmu_unload(vcpu);
        vcpu_put(vcpu);
 
@@ -6019,16 +6021,16 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
        kvm_x86_ops->vcpu_free(vcpu);
 }
 
-int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 {
        atomic_set(&vcpu->arch.nmi_queued, 0);
        vcpu->arch.nmi_pending = 0;
        vcpu->arch.nmi_injected = false;
 
-       vcpu->arch.switch_db_regs = 0;
        memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
        vcpu->arch.dr6 = DR6_FIXED_1;
        vcpu->arch.dr7 = DR7_FIXED_1;
+       kvm_update_dr7(vcpu);
 
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        vcpu->arch.apf.msr_val = 0;
@@ -6160,6 +6162,8 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
        return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
 }
 
+struct static_key kvm_no_apic_vcpu __read_mostly;
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
        struct page *page;
@@ -6192,7 +6196,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                r = kvm_create_lapic(vcpu);
                if (r < 0)
                        goto fail_mmu_destroy;
-       }
+       } else
+               static_key_slow_inc(&kvm_no_apic_vcpu);
 
        vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
                                       GFP_KERNEL);
@@ -6232,6 +6237,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
        kvm_mmu_destroy(vcpu);
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
        free_page((unsigned long)vcpu->arch.pio_data);
+       if (!irqchip_in_kernel(vcpu->kvm))
+               static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
@@ -6244,15 +6251,21 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
        set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+       /* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
+       set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+               &kvm->arch.irq_sources_bitmap);
 
        raw_spin_lock_init(&kvm->arch.tsc_write_lock);
+       mutex_init(&kvm->arch.apic_map_lock);
 
        return 0;
 }
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
-       vcpu_load(vcpu);
+       int r;
+       r = vcpu_load(vcpu);
+       BUG_ON(r);
        kvm_mmu_unload(vcpu);
        vcpu_put(vcpu);
 }
@@ -6296,6 +6309,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                put_page(kvm->arch.apic_access_page);
        if (kvm->arch.ept_identity_pagetable)
                put_page(kvm->arch.ept_identity_pagetable);
+       kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
@@ -6441,14 +6455,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
        kvm_mmu_slot_remove_write_access(kvm, mem->slot);
        spin_unlock(&kvm->mmu_lock);
+       /*
+        * If memory slot is created, or moved, we need to clear all
+        * mmio sptes.
+        */
+       if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) {
+               kvm_mmu_zap_all(kvm);
+               kvm_reload_remote_mmus(kvm);
+       }
 }
 
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
        kvm_mmu_zap_all(kvm);
        kvm_reload_remote_mmus(kvm);
 }
 
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+                                  struct kvm_memory_slot *slot)
+{
+       kvm_arch_flush_shadow_all(kvm);
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
        return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&