#include <linux/srcu.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/bsearch.h>
#include <asm/processor.h>
#include <asm/io.h>
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
+#ifdef CONFIG_COMPAT
+static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
+ unsigned long arg);
+#endif
static int hardware_enable_all(void);
static void hardware_disable_all(void);
static bool largepages_enabled = true;
-static struct page *hwpoison_page;
-static pfn_t hwpoison_pfn;
-
-static struct page *fault_page;
-static pfn_t fault_pfn;
-
-inline int kvm_is_mmio_pfn(pfn_t pfn)
+bool kvm_is_mmio_pfn(pfn_t pfn)
{
if (pfn_valid(pfn)) {
int reserved;
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- int dirty_count = kvm->tlbs_dirty;
+ long dirty_count = kvm->tlbs_dirty;
smp_mb();
if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
}
vcpu->run = page_address(page);
+ kvm_vcpu_set_in_spin_loop(vcpu, false);
+ kvm_vcpu_set_dy_eligible(vcpu, false);
+
r = kvm_arch_vcpu_init(vcpu);
if (r < 0)
goto fail_free_run;
*/
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
+
kvm->mmu_notifier_seq++;
need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
- spin_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
-
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
kvm_flush_remote_tlbs(kvm);
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
}
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
- for (; start < end; start += PAGE_SIZE)
- need_tlb_flush |= kvm_unmap_hva(kvm, start);
+ need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
need_tlb_flush |= kvm->tlbs_dirty;
- spin_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
-
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
kvm_flush_remote_tlbs(kvm);
+
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
* been freed.
*/
kvm->mmu_notifier_seq++;
+ smp_wmb();
/*
* The above sequence increase must be visible before the
- * below count decrease but both values are read by the kvm
- * page fault under mmu_lock spinlock so we don't need to add
- * a smb_wmb() here in between the two.
+ * below count decrease, which is ensured by the smp_wmb above
+ * in conjunction with the smp_rmb in mmu_notifier_retry().
*/
kvm->mmu_notifier_count--;
spin_unlock(&kvm->mmu_lock);
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
- young = kvm_age_hva(kvm, address);
- spin_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
+ young = kvm_age_hva(kvm, address);
if (young)
kvm_flush_remote_tlbs(kvm);
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+
return young;
}
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
-static struct kvm *kvm_create_vm(void)
+static void kvm_init_memslots_id(struct kvm *kvm)
+{
+ int i;
+ struct kvm_memslots *slots = kvm->memslots;
+
+ for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+ slots->id_to_index[i] = slots->memslots[i].id = i;
+}
+
+static struct kvm *kvm_create_vm(unsigned long type)
{
int r, i;
struct kvm *kvm = kvm_arch_alloc_vm();
if (!kvm)
return ERR_PTR(-ENOMEM);
- r = kvm_arch_init_vm(kvm);
+ r = kvm_arch_init_vm(kvm, type);
if (r)
goto out_err_nodisable;
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!kvm->memslots)
goto out_err_nosrcu;
+ kvm_init_memslots_id(kvm);
if (init_srcu_struct(&kvm->srcu))
goto out_err_nosrcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
goto out_err;
}
- r = kvm_init_mmu_notifier(kvm);
- if (r)
- goto out_err;
-
+ spin_lock_init(&kvm->mmu_lock);
kvm->mm = current->mm;
atomic_inc(&kvm->mm->mm_count);
- spin_lock_init(&kvm->mmu_lock);
kvm_eventfd_init(kvm);
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
atomic_set(&kvm->users_count, 1);
+
+ r = kvm_init_mmu_notifier(kvm);
+ if (r)
+ goto out_err;
+
raw_spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
raw_spin_unlock(&kvm_lock);
return ERR_PTR(r);
}
+/*
+ * Avoid using vmalloc for a small buffer.
+ * Should not be used when the size is statically known.
+ */
+void *kvm_kvzalloc(unsigned long size)
+{
+ if (size > PAGE_SIZE)
+ return vzalloc(size);
+ else
+ return kzalloc(size, GFP_KERNEL);
+}
+
+void kvm_kvfree(const void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+}
+
static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
{
if (!memslot->dirty_bitmap)
return;
- if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE)
- vfree(memslot->dirty_bitmap_head);
- else
- kfree(memslot->dirty_bitmap_head);
-
+ kvm_kvfree(memslot->dirty_bitmap);
memslot->dirty_bitmap = NULL;
- memslot->dirty_bitmap_head = NULL;
}
/*
static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
- int i;
-
- if (!dont || free->rmap != dont->rmap)
- vfree(free->rmap);
-
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
kvm_destroy_dirty_bitmap(free);
-
- for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
- if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
- vfree(free->lpage_info[i]);
- free->lpage_info[i] = NULL;
- }
- }
+ kvm_arch_free_memslot(free, dont);
free->npages = 0;
- free->rmap = NULL;
}
void kvm_free_physmem(struct kvm *kvm)
{
- int i;
struct kvm_memslots *slots = kvm->memslots;
+ struct kvm_memory_slot *memslot;
- for (i = 0; i < slots->nmemslots; ++i)
- kvm_free_physmem_slot(&slots->memslots[i], NULL);
+ kvm_for_each_memslot(memslot, slots)
+ kvm_free_physmem_slot(memslot, NULL);
kfree(kvm->memslots);
}
return 0;
}
-#ifndef CONFIG_S390
/*
* Allocation size is twice as large as the actual dirty bitmap size.
- * This makes it possible to do double buffering: see x86's
- * kvm_vm_ioctl_get_dirty_log().
+ * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed.
*/
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
{
+#ifndef CONFIG_S390
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
- if (dirty_bytes > PAGE_SIZE)
- memslot->dirty_bitmap = vzalloc(dirty_bytes);
- else
- memslot->dirty_bitmap = kzalloc(dirty_bytes, GFP_KERNEL);
-
+ memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
if (!memslot->dirty_bitmap)
return -ENOMEM;
- memslot->dirty_bitmap_head = memslot->dirty_bitmap;
+#endif /* !CONFIG_S390 */
+ return 0;
+}
+
+static int cmp_memslot(const void *slot1, const void *slot2)
+{
+ struct kvm_memory_slot *s1, *s2;
+
+ s1 = (struct kvm_memory_slot *)slot1;
+ s2 = (struct kvm_memory_slot *)slot2;
+
+ if (s1->npages < s2->npages)
+ return 1;
+ if (s1->npages > s2->npages)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Sort the memslots base on its size, so the larger slots
+ * will get better fit.
+ */
+static void sort_memslots(struct kvm_memslots *slots)
+{
+ int i;
+
+ sort(slots->memslots, KVM_MEM_SLOTS_NUM,
+ sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
+
+ for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+ slots->id_to_index[slots->memslots[i].id] = i;
+}
+
+void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
+{
+ if (new) {
+ int id = new->id;
+ struct kvm_memory_slot *old = id_to_memslot(slots, id);
+ unsigned long npages = old->npages;
+
+ *old = *new;
+ if (new->npages != npages)
+ sort_memslots(slots);
+ }
+
+ slots->generation++;
+}
+
+static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
+{
+ if (mem->flags & ~KVM_MEM_LOG_DIRTY_PAGES)
+ return -EINVAL;
+
return 0;
}
-#endif /* !CONFIG_S390 */
/*
* Allocate some memory and give it an address in the guest physical address
struct kvm_memory_slot old, new;
struct kvm_memslots *slots, *old_memslots;
+ r = check_memory_region_flags(mem);
+ if (r)
+ goto out;
+
r = -EINVAL;
/* General sanity checks */
if (mem->memory_size & (PAGE_SIZE - 1))
goto out;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
- if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
+ /* We can read the guest memory with __xxx_user() later on. */
+ if (user_alloc &&
+ ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
+ !access_ok(VERIFY_WRITE,
+ (void __user *)(unsigned long)mem->userspace_addr,
+ mem->memory_size)))
goto out;
- if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+ if (mem->slot >= KVM_MEM_SLOTS_NUM)
goto out;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
- memslot = &kvm->memslots->memslots[mem->slot];
+ memslot = id_to_memslot(kvm->memslots, mem->slot);
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
r = -ENOMEM;
/* Allocate if a slot is being created */
-#ifndef CONFIG_S390
- if (npages && !new.rmap) {
- new.rmap = vzalloc(npages * sizeof(*new.rmap));
-
- if (!new.rmap)
- goto out_free;
-
+ if (npages && !old.npages) {
new.user_alloc = user_alloc;
new.userspace_addr = mem->userspace_addr;
- }
- if (!npages)
- goto skip_lpage;
-
- for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
- unsigned long ugfn;
- unsigned long j;
- int lpages;
- int level = i + 2;
- /* Avoid unused variable warning if no large pages */
- (void)level;
-
- if (new.lpage_info[i])
- continue;
-
- lpages = 1 + ((base_gfn + npages - 1)
- >> KVM_HPAGE_GFN_SHIFT(level));
- lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level);
-
- new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i]));
-
- if (!new.lpage_info[i])
+ if (kvm_arch_create_memslot(&new, npages))
goto out_free;
-
- if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
- new.lpage_info[i][0].write_count = 1;
- if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
- new.lpage_info[i][lpages - 1].write_count = 1;
- ugfn = new.userspace_addr >> PAGE_SHIFT;
- /*
- * If the gfn and userspace address are not aligned wrt each
- * other, or if explicitly asked to, disable large page
- * support for this slot
- */
- if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
- !largepages_enabled)
- for (j = 0; j < lpages; ++j)
- new.lpage_info[i][j].write_count = 1;
}
-skip_lpage:
-
/* Allocate page dirty bitmap if needed */
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
if (kvm_create_dirty_bitmap(&new) < 0)
goto out_free;
/* destroy any largepage mappings for dirty tracking */
}
-#else /* not defined CONFIG_S390 */
- new.user_alloc = user_alloc;
- if (user_alloc)
- new.userspace_addr = mem->userspace_addr;
-#endif /* not defined CONFIG_S390 */
if (!npages) {
+ struct kvm_memory_slot *slot;
+
r = -ENOMEM;
- slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+ GFP_KERNEL);
if (!slots)
goto out_free;
- memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
- if (mem->slot >= slots->nmemslots)
- slots->nmemslots = mem->slot + 1;
- slots->generation++;
- slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+ slot = id_to_memslot(slots, mem->slot);
+ slot->flags |= KVM_MEMSLOT_INVALID;
+
+ update_memslots(slots, NULL);
old_memslots = kvm->memslots;
rcu_assign_pointer(kvm->memslots, slots);
if (r)
goto out_free;
- /* map the pages in iommu page table */
+ /* map/unmap the pages in iommu page table */
if (npages) {
r = kvm_iommu_map_pages(kvm, &new);
if (r)
goto out_free;
- }
+ } else
+ kvm_iommu_unmap_pages(kvm, &old);
r = -ENOMEM;
- slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+ GFP_KERNEL);
if (!slots)
goto out_free;
- memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
- if (mem->slot >= slots->nmemslots)
- slots->nmemslots = mem->slot + 1;
- slots->generation++;
/* actual memory is freed via old in kvm_free_physmem_slot below */
if (!npages) {
- new.rmap = NULL;
new.dirty_bitmap = NULL;
- for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
- new.lpage_info[i] = NULL;
+ memset(&new.arch, 0, sizeof(new.arch));
}
- slots->memslots[mem->slot] = new;
+ update_memslots(slots, &new);
old_memslots = kvm->memslots;
rcu_assign_pointer(kvm->memslots, slots);
synchronize_srcu_expedited(&kvm->srcu);
kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
+ /*
+ * If the new memory slot is created, we need to clear all
+ * mmio sptes.
+ */
+ if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
+ kvm_arch_flush_shadow(kvm);
+
kvm_free_physmem_slot(&old, &new);
kfree(old_memslots);
if (log->slot >= KVM_MEMORY_SLOTS)
goto out;
- memslot = &kvm->memslots->memslots[log->slot];
+ memslot = id_to_memslot(kvm->memslots, log->slot);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
return r;
}
-void kvm_disable_largepages(void)
+bool kvm_largepages_enabled(void)
{
- largepages_enabled = false;
+ return largepages_enabled;
}
-EXPORT_SYMBOL_GPL(kvm_disable_largepages);
-int is_error_page(struct page *page)
-{
- return page == bad_page || page == hwpoison_page || page == fault_page;
-}
-EXPORT_SYMBOL_GPL(is_error_page);
-
-int is_error_pfn(pfn_t pfn)
-{
- return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_error_pfn);
-
-int is_hwpoison_pfn(pfn_t pfn)
-{
- return pfn == hwpoison_pfn;
-}
-EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
-
-int is_fault_pfn(pfn_t pfn)
+void kvm_disable_largepages(void)
{
- return pfn == fault_pfn;
+ largepages_enabled = false;
}
-EXPORT_SYMBOL_GPL(is_fault_pfn);
+EXPORT_SYMBOL_GPL(kvm_disable_largepages);
static inline unsigned long bad_hva(void)
{
}
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
-static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
- gfn_t gfn)
-{
- int i;
-
- for (i = 0; i < slots->nmemslots; ++i) {
- struct kvm_memory_slot *memslot = &slots->memslots[i];
-
- if (gfn >= memslot->base_gfn
- && gfn < memslot->base_gfn + memslot->npages)
- return memslot;
- }
- return NULL;
-}
-
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
return __gfn_to_memslot(kvm_memslots(kvm), gfn);
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
- int i;
- struct kvm_memslots *slots = kvm_memslots(kvm);
-
- for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *memslot = &slots->memslots[i];
+ struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
- if (memslot->flags & KVM_MEMSLOT_INVALID)
- continue;
+ if (!memslot || memslot->id >= KVM_MEMORY_SLOTS ||
+ memslot->flags & KVM_MEMSLOT_INVALID)
+ return 0;
- if (gfn >= memslot->base_gfn
- && gfn < memslot->base_gfn + memslot->npages)
- return 1;
- }
- return 0;
+ return 1;
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
return size;
}
-int memslot_id(struct kvm *kvm, gfn_t gfn)
-{
- int i;
- struct kvm_memslots *slots = kvm_memslots(kvm);
- struct kvm_memory_slot *memslot = NULL;
-
- for (i = 0; i < slots->nmemslots; ++i) {
- memslot = &slots->memslots[i];
-
- if (gfn >= memslot->base_gfn
- && gfn < memslot->base_gfn + memslot->npages)
- break;
- }
-
- return memslot - slots->memslots;
-}
-
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
gfn_t *nr_pages)
{
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
-static pfn_t get_fault_pfn(void)
+int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int write, struct page **page)
{
- get_page(fault_page);
- return fault_pfn;
+ int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET;
+
+ if (write)
+ flags |= FOLL_WRITE;
+
+ return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
}
static inline int check_user_page_hwpoison(unsigned long addr)
return rc == -EHWPOISON;
}
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
- bool *async, bool write_fault, bool *writable)
+static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+ bool write_fault, bool *writable)
{
struct page *page[1];
int npages = 0;
if (writable)
*writable = write_fault;
- npages = get_user_pages_fast(addr, 1, write_fault, page);
+ if (async) {
+ down_read(¤t->mm->mmap_sem);
+ npages = get_user_page_nowait(current, current->mm,
+ addr, write_fault, page);
+ up_read(¤t->mm->mmap_sem);
+ } else
+ npages = get_user_pages_fast(addr, 1, write_fault,
+ page);
/* map read fault as writable if possible */
if (unlikely(!write_fault) && npages == 1) {
struct vm_area_struct *vma;
if (atomic)
- return get_fault_pfn();
+ return KVM_PFN_ERR_FAULT;
down_read(¤t->mm->mmap_sem);
- if (check_user_page_hwpoison(addr)) {
+ if (npages == -EHWPOISON ||
+ (!async && check_user_page_hwpoison(addr))) {
up_read(¤t->mm->mmap_sem);
- get_page(hwpoison_page);
- return page_to_pfn(hwpoison_page);
+ return KVM_PFN_ERR_HWPOISON;
}
vma = find_vma_intersection(current->mm, addr, addr+1);
if (vma == NULL)
- pfn = get_fault_pfn();
+ pfn = KVM_PFN_ERR_FAULT;
else if ((vma->vm_flags & VM_PFNMAP)) {
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
} else {
if (async && (vma->vm_flags & VM_WRITE))
*async = true;
- pfn = get_fault_pfn();
+ pfn = KVM_PFN_ERR_FAULT;
}
up_read(¤t->mm->mmap_sem);
} else
return pfn;
}
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+pfn_t hva_to_pfn_atomic(unsigned long addr)
{
- return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
+ return hva_to_pfn(addr, true, NULL, true, NULL);
}
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
*async = false;
addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr)) {
- get_page(bad_page);
- return page_to_pfn(bad_page);
- }
+ if (kvm_is_error_hva(addr))
+ return KVM_PFN_ERR_BAD;
- return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
+ return hva_to_pfn(addr, atomic, async, write_fault, writable);
}
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
- return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
+ return hva_to_pfn(addr, false, NULL, true, NULL);
}
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
}
EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
+static struct page *kvm_pfn_to_page(pfn_t pfn)
+{
+ if (is_error_pfn(pfn))
+ return KVM_ERR_PTR_BAD_PAGE;
+
+ if (kvm_is_mmio_pfn(pfn)) {
+ WARN_ON(1);
+ return KVM_ERR_PTR_BAD_PAGE;
+ }
+
+ return pfn_to_page(pfn);
+}
+
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
pfn_t pfn;
pfn = gfn_to_pfn(kvm, gfn);
- if (!kvm_is_mmio_pfn(pfn))
- return pfn_to_page(pfn);
- WARN_ON(kvm_is_mmio_pfn(pfn));
-
- get_page(bad_page);
- return bad_page;
+ return kvm_pfn_to_page(pfn);
}
EXPORT_SYMBOL_GPL(gfn_to_page);
void kvm_release_page_clean(struct page *page)
{
+ WARN_ON(is_error_page(page));
+
kvm_release_pfn_clean(page_to_pfn(page));
}
EXPORT_SYMBOL_GPL(kvm_release_page_clean);
void kvm_release_pfn_clean(pfn_t pfn)
{
+ WARN_ON(is_error_pfn(pfn));
+
if (!kvm_is_mmio_pfn(pfn))
put_page(pfn_to_page(pfn));
}
void kvm_release_page_dirty(struct page *page)
{
+ WARN_ON(is_error_page(page));
+
kvm_release_pfn_dirty(page_to_pfn(page));
}
EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
- r = copy_from_user(data, (void __user *)addr + offset, len);
+ r = __copy_from_user(data, (void __user *)addr + offset, len);
if (r)
return -EFAULT;
return 0;
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
- r = copy_to_user((void __user *)addr + offset, data, len);
+ r = __copy_to_user((void __user *)addr + offset, data, len);
if (r)
return -EFAULT;
mark_page_dirty(kvm, gfn);
ghc->gpa = gpa;
ghc->generation = slots->generation;
- ghc->memslot = __gfn_to_memslot(slots, gfn);
+ ghc->memslot = gfn_to_memslot(kvm, gfn);
ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL);
if (!kvm_is_error_hva(ghc->hva))
ghc->hva += offset;
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
- r = copy_to_user((void __user *)ghc->hva, data, len);
+ r = __copy_to_user((void __user *)ghc->hva, data, len);
if (r)
return -EFAULT;
mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
}
EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
+int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned long len)
+{
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ int r;
+
+ if (slots->generation != ghc->generation)
+ kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa);
+
+ if (kvm_is_error_hva(ghc->hva))
+ return -EFAULT;
+
+ r = __copy_from_user(data, (void __user *)ghc->hva, len);
+ if (r)
+ return -EFAULT;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
+
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
{
return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
- __set_bit_le(rel_gfn, memslot->dirty_bitmap);
+ /* TODO: introduce set_bit_le() and use it */
+ test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap);
}
}
finish_wait(&vcpu->wq, &wait);
}
+#ifndef CONFIG_S390
+/*
+ * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
+ */
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int me;
+ int cpu = vcpu->cpu;
+ wait_queue_head_t *wqp;
+
+ wqp = kvm_arch_vcpu_wq(vcpu);
+ if (waitqueue_active(wqp)) {
+ wake_up_interruptible(wqp);
+ ++vcpu->stat.halt_wakeup;
+ }
+
+ me = get_cpu();
+ if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+ if (kvm_arch_vcpu_should_kick(vcpu))
+ smp_send_reschedule(cpu);
+ put_cpu();
+}
+#endif /* !CONFIG_S390 */
+
void kvm_resched(struct kvm_vcpu *vcpu)
{
if (!need_resched())
}
EXPORT_SYMBOL_GPL(kvm_resched);
+bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
+{
+ struct pid *pid;
+ struct task_struct *task = NULL;
+
+ rcu_read_lock();
+ pid = rcu_dereference(target->pid);
+ if (pid)
+ task = get_pid_task(target->pid, PIDTYPE_PID);
+ rcu_read_unlock();
+ if (!task)
+ return false;
+ if (task->flags & PF_VCPU) {
+ put_task_struct(task);
+ return false;
+ }
+ if (yield_to(task, 1)) {
+ put_task_struct(task);
+ return true;
+ }
+ put_task_struct(task);
+ return false;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
+
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+/*
+ * Helper that checks whether a VCPU is eligible for directed yield.
+ * Most eligible candidate to yield is decided by following heuristics:
+ *
+ * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
+ * (preempted lock holder), indicated by @in_spin_loop.
+ * Set at the beiginning and cleared at the end of interception/PLE handler.
+ *
+ * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
+ * chance last time (mostly it has become eligible now since we have probably
+ * yielded to lockholder in last iteration. This is done by toggling
+ * @dy_eligible each time a VCPU checked for eligibility.)
+ *
+ * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
+ * to preempted lock-holder could result in wrong VCPU selection and CPU
+ * burning. Giving priority for a potential lock-holder increases lock
+ * progress.
+ *
+ * Since algorithm is based on heuristics, accessing another VCPU data without
+ * locking does not harm. It may result in trying to yield to same VCPU, fail
+ * and continue with next VCPU and so on.
+ */
+bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+{
+ bool eligible;
+
+ eligible = !vcpu->spin_loop.in_spin_loop ||
+ (vcpu->spin_loop.in_spin_loop &&
+ vcpu->spin_loop.dy_eligible);
+
+ if (vcpu->spin_loop.in_spin_loop)
+ kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
+
+ return eligible;
+}
+#endif
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
{
struct kvm *kvm = me->kvm;
int pass;
int i;
+ kvm_vcpu_set_in_spin_loop(me, true);
/*
* We boost the priority of a VCPU that is runnable but not
* currently running, because it got preempted by something
*/
for (pass = 0; pass < 2 && !yielded; pass++) {
kvm_for_each_vcpu(i, vcpu, kvm) {
- struct task_struct *task = NULL;
- struct pid *pid;
- if (!pass && i < last_boosted_vcpu) {
+ if (!pass && i <= last_boosted_vcpu) {
i = last_boosted_vcpu;
continue;
} else if (pass && i > last_boosted_vcpu)
continue;
if (waitqueue_active(&vcpu->wq))
continue;
- rcu_read_lock();
- pid = rcu_dereference(vcpu->pid);
- if (pid)
- task = get_pid_task(vcpu->pid, PIDTYPE_PID);
- rcu_read_unlock();
- if (!task)
+ if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
- if (task->flags & PF_VCPU) {
- put_task_struct(task);
- continue;
- }
- if (yield_to(task, 1)) {
- put_task_struct(task);
+ if (kvm_vcpu_yield_to(vcpu)) {
kvm->last_boosted_vcpu = i;
yielded = 1;
break;
}
- put_task_struct(task);
}
}
+ kvm_vcpu_set_in_spin_loop(me, false);
+
+ /* Ensure vcpu is not eligible during next spinloop */
+ kvm_vcpu_set_dy_eligible(me, false);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
#endif
else
- return VM_FAULT_SIGBUS;
+ return kvm_arch_vcpu_fault(vcpu, vmf);
get_page(page);
vmf->page = page;
return 0;
static struct file_operations kvm_vcpu_fops = {
.release = kvm_vcpu_release,
.unlocked_ioctl = kvm_vcpu_ioctl,
- .compat_ioctl = kvm_vcpu_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = kvm_vcpu_compat_ioctl,
+#endif
.mmap = kvm_vcpu_mmap,
.llseek = noop_llseek,
};
r = kvm_arch_vcpu_setup(vcpu);
if (r)
- return r;
+ goto vcpu_destroy;
mutex_lock(&kvm->lock);
+ if (!kvm_vcpu_compatible(vcpu)) {
+ r = -EINVAL;
+ goto unlock_vcpu_destroy;
+ }
if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
r = -EINVAL;
- goto vcpu_destroy;
+ goto unlock_vcpu_destroy;
}
kvm_for_each_vcpu(r, v, kvm)
if (v->vcpu_id == id) {
r = -EEXIST;
- goto vcpu_destroy;
+ goto unlock_vcpu_destroy;
}
BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
r = create_vcpu_fd(vcpu);
if (r < 0) {
kvm_put_kvm(kvm);
- goto vcpu_destroy;
+ goto unlock_vcpu_destroy;
}
kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
smp_wmb();
atomic_inc(&kvm->online_vcpus);
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
- if (kvm->bsp_vcpu_id == id)
- kvm->bsp_vcpu = vcpu;
-#endif
mutex_unlock(&kvm->lock);
return r;
-vcpu_destroy:
+unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
+vcpu_destroy:
kvm_arch_vcpu_destroy(vcpu);
return r;
}
struct kvm_regs *kvm_regs;
r = -ENOMEM;
- kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
- if (!kvm_regs)
+ kvm_regs = memdup_user(argp, sizeof(*kvm_regs));
+ if (IS_ERR(kvm_regs)) {
+ r = PTR_ERR(kvm_regs);
goto out;
- r = -EFAULT;
- if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
- goto out_free2;
+ }
r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
if (r)
goto out_free2;
break;
}
case KVM_SET_SREGS: {
- kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
- r = -ENOMEM;
- if (!kvm_sregs)
- goto out;
- r = -EFAULT;
- if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs)))
+ kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs));
+ if (IS_ERR(kvm_sregs)) {
+ r = PTR_ERR(kvm_sregs);
goto out;
+ }
r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
if (r)
goto out;
break;
}
case KVM_SET_FPU: {
- fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
- r = -ENOMEM;
- if (!fpu)
- goto out;
- r = -EFAULT;
- if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu)))
+ fpu = memdup_user(argp, sizeof(*fpu));
+ if (IS_ERR(fpu)) {
+ r = PTR_ERR(fpu);
goto out;
+ }
r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
if (r)
goto out;
return r;
}
+#ifdef CONFIG_COMPAT
+static long kvm_vcpu_compat_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = compat_ptr(arg);
+ int r;
+
+ if (vcpu->kvm->mm != current->mm)
+ return -EIO;
+
+ switch (ioctl) {
+ case KVM_SET_SIGNAL_MASK: {
+ struct kvm_signal_mask __user *sigmask_arg = argp;
+ struct kvm_signal_mask kvm_sigmask;
+ compat_sigset_t csigset;
+ sigset_t sigset;
+
+ if (argp) {
+ r = -EFAULT;
+ if (copy_from_user(&kvm_sigmask, argp,
+ sizeof kvm_sigmask))
+ goto out;
+ r = -EINVAL;
+ if (kvm_sigmask.len != sizeof csigset)
+ goto out;
+ r = -EFAULT;
+ if (copy_from_user(&csigset, sigmask_arg->sigset,
+ sizeof csigset))
+ goto out;
+ }
+ sigset_from_compat(&sigset, &csigset);
+ r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
+ break;
+ }
+ default:
+ r = kvm_vcpu_ioctl(filp, ioctl, arg);
+ }
+
+out:
+ return r;
+}
+#endif
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
r = -EFAULT;
if (copy_from_user(&data, argp, sizeof data))
goto out;
- r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
+ r = kvm_irqfd(kvm, &data);
break;
}
case KVM_IOEVENTFD: {
kvm->bsp_vcpu_id = arg;
mutex_unlock(&kvm->lock);
break;
+#endif
+#ifdef CONFIG_HAVE_KVM_MSI
+ case KVM_SIGNAL_MSI: {
+ struct kvm_msi msi;
+
+ r = -EFAULT;
+ if (copy_from_user(&msi, argp, sizeof msi))
+ goto out;
+ r = kvm_send_userspace_msi(kvm, &msi);
+ break;
+ }
+#endif
+#ifdef __KVM_HAVE_IRQ_LINE
+ case KVM_IRQ_LINE_STATUS:
+ case KVM_IRQ_LINE: {
+ struct kvm_irq_level irq_event;
+
+ r = -EFAULT;
+ if (copy_from_user(&irq_event, argp, sizeof irq_event))
+ goto out;
+
+ r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
+ if (r)
+ goto out;
+
+ r = -EFAULT;
+ if (ioctl == KVM_IRQ_LINE_STATUS) {
+ if (copy_to_user(argp, &irq_event, sizeof irq_event))
+ goto out;
+ }
+
+ r = 0;
+ break;
+ }
#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
.llseek = noop_llseek,
};
-static int kvm_dev_ioctl_create_vm(void)
+static int kvm_dev_ioctl_create_vm(unsigned long type)
{
int r;
struct kvm *kvm;
- kvm = kvm_create_vm();
+ kvm = kvm_create_vm(type);
if (IS_ERR(kvm))
return PTR_ERR(kvm);
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
case KVM_CAP_SET_BOOT_CPU_ID:
#endif
case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+ case KVM_CAP_SIGNAL_MSI:
+#endif
return 1;
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef KVM_CAP_IRQ_ROUTING
case KVM_CAP_IRQ_ROUTING:
return KVM_MAX_IRQ_ROUTES;
#endif
r = KVM_API_VERSION;
break;
case KVM_CREATE_VM:
- r = -EINVAL;
- if (arg)
- goto out;
- r = kvm_dev_ioctl_create_vm();
+ r = kvm_dev_ioctl_create_vm(arg);
break;
case KVM_CHECK_EXTENSION:
r = kvm_dev_ioctl_check_extension_generic(arg);
int i;
for (i = 0; i < bus->dev_count; i++) {
- struct kvm_io_device *pos = bus->devs[i];
+ struct kvm_io_device *pos = bus->range[i].dev;
kvm_iodevice_destructor(pos);
}
kfree(bus);
}
+int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
+{
+ const struct kvm_io_range *r1 = p1;
+ const struct kvm_io_range *r2 = p2;
+
+ if (r1->addr < r2->addr)
+ return -1;
+ if (r1->addr + r1->len > r2->addr + r2->len)
+ return 1;
+ return 0;
+}
+
+int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
+ gpa_t addr, int len)
+{
+ bus->range[bus->dev_count++] = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ .dev = dev,
+ };
+
+ sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
+ kvm_io_bus_sort_cmp, NULL);
+
+ return 0;
+}
+
+int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
+ gpa_t addr, int len)
+{
+ struct kvm_io_range *range, key;
+ int off;
+
+ key = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ };
+
+ range = bsearch(&key, bus->range, bus->dev_count,
+ sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp);
+ if (range == NULL)
+ return -ENOENT;
+
+ off = range - bus->range;
+
+ while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
+ off--;
+
+ return off;
+}
+
/* kvm_io_bus_write - called under kvm->slots_lock */
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
- int i;
+ int idx;
struct kvm_io_bus *bus;
+ struct kvm_io_range range;
+
+ range = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ };
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- for (i = 0; i < bus->dev_count; i++)
- if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
+ idx = kvm_io_bus_get_first_dev(bus, addr, len);
+ if (idx < 0)
+ return -EOPNOTSUPP;
+
+ while (idx < bus->dev_count &&
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+ if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val))
return 0;
+ idx++;
+ }
+
return -EOPNOTSUPP;
}
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val)
{
- int i;
+ int idx;
struct kvm_io_bus *bus;
+ struct kvm_io_range range;
+
+ range = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ };
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- for (i = 0; i < bus->dev_count; i++)
- if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
+ idx = kvm_io_bus_get_first_dev(bus, addr, len);
+ if (idx < 0)
+ return -EOPNOTSUPP;
+
+ while (idx < bus->dev_count &&
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+ if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val))
return 0;
+ idx++;
+ }
+
return -EOPNOTSUPP;
}
/* Caller must hold slots_lock. */
-int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev)
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ int len, struct kvm_io_device *dev)
{
struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx];
- if (bus->dev_count > NR_IOBUS_DEVS-1)
+ if (bus->dev_count > NR_IOBUS_DEVS - 1)
return -ENOSPC;
- new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+ new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) *
+ sizeof(struct kvm_io_range)), GFP_KERNEL);
if (!new_bus)
return -ENOMEM;
- memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
- new_bus->devs[new_bus->dev_count++] = dev;
+ memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
+ sizeof(struct kvm_io_range)));
+ kvm_io_bus_insert_dev(new_bus, dev, addr, len);
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
int i, r;
struct kvm_io_bus *new_bus, *bus;
- new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
- if (!new_bus)
- return -ENOMEM;
-
bus = kvm->buses[bus_idx];
- memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
-
r = -ENOENT;
- for (i = 0; i < new_bus->dev_count; i++)
- if (new_bus->devs[i] == dev) {
+ for (i = 0; i < bus->dev_count; i++)
+ if (bus->range[i].dev == dev) {
r = 0;
- new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
break;
}
- if (r) {
- kfree(new_bus);
+ if (r)
return r;
- }
+
+ new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) *
+ sizeof(struct kvm_io_range)), GFP_KERNEL);
+ if (!new_bus)
+ return -ENOMEM;
+
+ memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
+ new_bus->dev_count--;
+ memcpy(new_bus->range + i, bus->range + i + 1,
+ (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
[KVM_STAT_VM] = &vm_stat_fops,
};
-static void kvm_init_debug(void)
+static int kvm_init_debug(void)
{
+ int r = -EFAULT;
struct kvm_stats_debugfs_item *p;
kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
- for (p = debugfs_entries; p->name; ++p)
+ if (kvm_debugfs_dir == NULL)
+ goto out;
+
+ for (p = debugfs_entries; p->name; ++p) {
p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]);
+ if (p->dentry == NULL)
+ goto out_dir;
+ }
+
+ return 0;
+
+out_dir:
+ debugfs_remove_recursive(kvm_debugfs_dir);
+out:
+ return r;
}
static void kvm_exit_debug(void)
.resume = kvm_resume,
};
-struct page *bad_page;
-pfn_t bad_pfn;
-
static inline
struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
{
if (r)
goto out_fail;
- bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-
- if (bad_page == NULL) {
- r = -ENOMEM;
- goto out;
- }
-
- bad_pfn = page_to_pfn(bad_page);
-
- hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-
- if (hwpoison_page == NULL) {
- r = -ENOMEM;
- goto out_free_0;
- }
-
- hwpoison_pfn = page_to_pfn(hwpoison_page);
-
- fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-
- if (fault_page == NULL) {
- r = -ENOMEM;
- goto out_free_0;
- }
-
- fault_pfn = page_to_pfn(fault_page);
-
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
r = -ENOMEM;
goto out_free_0;
kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out;
- kvm_init_debug();
+ r = kvm_init_debug();
+ if (r) {
+ printk(KERN_ERR "kvm: create debugfs files failed\n");
+ goto out_undebugfs;
+ }
return 0;
+out_undebugfs:
+ unregister_syscore_ops(&kvm_syscore_ops);
out_unreg:
kvm_async_pf_deinit();
out_free:
out_free_0a:
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
- if (fault_page)
- __free_page(fault_page);
- if (hwpoison_page)
- __free_page(hwpoison_page);
- __free_page(bad_page);
-out:
kvm_arch_exit();
out_fail:
return r;
kvm_arch_hardware_unsetup();
kvm_arch_exit();
free_cpumask_var(cpus_hardware_enabled);
- __free_page(hwpoison_page);
- __free_page(bad_page);
}
EXPORT_SYMBOL_GPL(kvm_exit);