bool kvm_is_mmio_pfn(pfn_t pfn)
{
- if (is_error_pfn(pfn))
- return false;
-
if (pfn_valid(pfn)) {
int reserved;
struct page *tail = pfn_to_page(pfn);
/*
* Switches to specified vcpu, until a matching vcpu_put()
*/
-void vcpu_load(struct kvm_vcpu *vcpu)
+int vcpu_load(struct kvm_vcpu *vcpu)
{
int cpu;
- mutex_lock(&vcpu->mutex);
+ if (mutex_lock_killable(&vcpu->mutex))
+ return -EINTR;
if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
/* The thread running this VCPU changed. */
struct pid *oldpid = vcpu->pid;
preempt_notifier_register(&vcpu->preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
+ return 0;
}
void vcpu_put(struct kvm_vcpu *vcpu)
make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
}
+void kvm_make_mclock_inprogress_request(struct kvm *kvm)
+{
+ make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+}
+
+void kvm_make_update_eoibitmap_request(struct kvm *kvm)
+{
+ make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP);
+}
+
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
struct page *page;
int idx;
idx = srcu_read_lock(&kvm->srcu);
- kvm_arch_flush_shadow(kvm);
+ kvm_arch_flush_shadow_all(kvm);
srcu_read_unlock(&kvm->srcu, idx);
}
INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
#endif
+ BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
+
r = -ENOMEM;
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!kvm->memslots)
static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
- if (!dont || free->rmap != dont->rmap)
- vfree(free->rmap);
-
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
kvm_destroy_dirty_bitmap(free);
kvm_arch_free_memslot(free, dont);
free->npages = 0;
- free->rmap = NULL;
}
void kvm_free_physmem(struct kvm *kvm)
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
#else
- kvm_arch_flush_shadow(kvm);
+ kvm_arch_flush_shadow_all(kvm);
#endif
kvm_arch_destroy_vm(kvm);
kvm_free_physmem(kvm);
slots->id_to_index[slots->memslots[i].id] = i;
}
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
+void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
+ u64 last_generation)
{
if (new) {
int id = new->id;
sort_memslots(slots);
}
- slots->generation++;
+ slots->generation = last_generation + 1;
+}
+
+static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
+{
+ u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
+
+#ifdef KVM_CAP_READONLY_MEM
+ valid_flags |= KVM_MEM_READONLY;
+#endif
+
+ if (mem->flags & ~valid_flags)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
+ struct kvm_memslots *slots, struct kvm_memory_slot *new)
+{
+ struct kvm_memslots *old_memslots = kvm->memslots;
+
+ update_memslots(slots, new, kvm->memslots->generation);
+ rcu_assign_pointer(kvm->memslots, slots);
+ synchronize_srcu_expedited(&kvm->srcu);
+ return old_memslots;
}
+/*
+ * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
+ * - create a new memory slot
+ * - delete an existing memory slot
+ * - modify an existing memory slot
+ * -- move it in the guest physical memory space
+ * -- just change its flags
+ *
+ * Since flags can be changed by some of these operations, the following
+ * differentiation is the best we can do for __kvm_set_memory_region():
+ */
+enum kvm_mr_change {
+ KVM_MR_CREATE,
+ KVM_MR_DELETE,
+ KVM_MR_MOVE,
+ KVM_MR_FLAGS_ONLY,
+};
+
/*
* Allocate some memory and give it an address in the guest physical address
* space.
*/
int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
- int user_alloc)
+ bool user_alloc)
{
int r;
gfn_t base_gfn;
unsigned long npages;
- unsigned long i;
- struct kvm_memory_slot *memslot;
+ struct kvm_memory_slot *slot;
struct kvm_memory_slot old, new;
- struct kvm_memslots *slots, *old_memslots;
+ struct kvm_memslots *slots = NULL, *old_memslots;
+ enum kvm_mr_change change;
+
+ r = check_memory_region_flags(mem);
+ if (r)
+ goto out;
r = -EINVAL;
/* General sanity checks */
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
- memslot = id_to_memslot(kvm->memslots, mem->slot);
+ slot = id_to_memslot(kvm->memslots, mem->slot);
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
if (!npages)
mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
- new = old = *memslot;
+ new = old = *slot;
new.id = mem->slot;
new.base_gfn = base_gfn;
new.npages = npages;
new.flags = mem->flags;
- /* Disallow changing a memory slot's size. */
r = -EINVAL;
- if (npages && old.npages && npages != old.npages)
- goto out_free;
+ if (npages) {
+ if (!old.npages)
+ change = KVM_MR_CREATE;
+ else { /* Modify an existing slot. */
+ if ((mem->userspace_addr != old.userspace_addr) ||
+ (npages != old.npages) ||
+ ((new.flags ^ old.flags) & KVM_MEM_READONLY))
+ goto out;
- /* Check for overlaps */
- r = -EEXIST;
- for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
+ if (base_gfn != old.base_gfn)
+ change = KVM_MR_MOVE;
+ else if (new.flags != old.flags)
+ change = KVM_MR_FLAGS_ONLY;
+ else { /* Nothing to change. */
+ r = 0;
+ goto out;
+ }
+ }
+ } else if (old.npages) {
+ change = KVM_MR_DELETE;
+ } else /* Modify a non-existent slot: disallowed. */
+ goto out;
- if (s == memslot || !s->npages)
- continue;
- if (!((base_gfn + npages <= s->base_gfn) ||
- (base_gfn >= s->base_gfn + s->npages)))
- goto out_free;
+ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+ /* Check for overlaps */
+ r = -EEXIST;
+ kvm_for_each_memslot(slot, kvm->memslots) {
+ if ((slot->id >= KVM_USER_MEM_SLOTS) ||
+ (slot->id == mem->slot))
+ continue;
+ if (!((base_gfn + npages <= slot->base_gfn) ||
+ (base_gfn >= slot->base_gfn + slot->npages)))
+ goto out;
+ }
}
/* Free page dirty bitmap if unneeded */
new.dirty_bitmap = NULL;
r = -ENOMEM;
-
- /* Allocate if a slot is being created */
- if (npages && !old.npages) {
- new.user_alloc = user_alloc;
+ if (change == KVM_MR_CREATE) {
new.userspace_addr = mem->userspace_addr;
-#ifndef CONFIG_S390
- new.rmap = vzalloc(npages * sizeof(*new.rmap));
- if (!new.rmap)
- goto out_free;
-#endif /* not defined CONFIG_S390 */
+
if (kvm_arch_create_memslot(&new, npages))
goto out_free;
}
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
if (kvm_create_dirty_bitmap(&new) < 0)
goto out_free;
- /* destroy any largepage mappings for dirty tracking */
}
- if (!npages) {
- struct kvm_memory_slot *slot;
-
+ if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
r = -ENOMEM;
slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
GFP_KERNEL);
slot = id_to_memslot(slots, mem->slot);
slot->flags |= KVM_MEMSLOT_INVALID;
- update_memslots(slots, NULL);
+ old_memslots = install_new_memslots(kvm, slots, NULL);
- old_memslots = kvm->memslots;
- rcu_assign_pointer(kvm->memslots, slots);
- synchronize_srcu_expedited(&kvm->srcu);
- /* From this point no new shadow pages pointing to a deleted
- * memslot will be created.
+ /* slot was deleted or moved, clear iommu mapping */
+ kvm_iommu_unmap_pages(kvm, &old);
+ /* From this point no new shadow pages pointing to a deleted,
+ * or moved, memslot will be created.
*
* validation of sp->gfn happens in:
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
* - kvm_is_visible_gfn (mmu_check_roots)
*/
- kvm_arch_flush_shadow(kvm);
- kfree(old_memslots);
+ kvm_arch_flush_shadow_memslot(kvm, slot);
+ slots = old_memslots;
}
r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
if (r)
- goto out_free;
+ goto out_slots;
- /* map/unmap the pages in iommu page table */
- if (npages) {
- r = kvm_iommu_map_pages(kvm, &new);
- if (r)
+ r = -ENOMEM;
+ /*
+ * We can re-use the old_memslots from above, the only difference
+ * from the currently installed memslots is the invalid flag. This
+ * will get overwritten by update_memslots anyway.
+ */
+ if (!slots) {
+ slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+ GFP_KERNEL);
+ if (!slots)
goto out_free;
- } else
- kvm_iommu_unmap_pages(kvm, &old);
+ }
- r = -ENOMEM;
- slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
- GFP_KERNEL);
- if (!slots)
- goto out_free;
+ /*
+ * IOMMU mapping: New slots need to be mapped. Old slots need to be
+ * un-mapped and re-mapped if their base changes. Since base change
+ * unmapping is handled above with slot deletion, mapping alone is
+ * needed here. Anything else the iommu might care about for existing
+ * slots (size changes, userspace addr changes and read-only flag
+ * changes) is disallowed above, so any other attribute changes getting
+ * here can be skipped.
+ */
+ if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+ r = kvm_iommu_map_pages(kvm, &new);
+ if (r)
+ goto out_slots;
+ }
/* actual memory is freed via old in kvm_free_physmem_slot below */
- if (!npages) {
- new.rmap = NULL;
+ if (change == KVM_MR_DELETE) {
new.dirty_bitmap = NULL;
memset(&new.arch, 0, sizeof(new.arch));
}
- update_memslots(slots, &new);
- old_memslots = kvm->memslots;
- rcu_assign_pointer(kvm->memslots, slots);
- synchronize_srcu_expedited(&kvm->srcu);
+ old_memslots = install_new_memslots(kvm, slots, &new);
kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
- /*
- * If the new memory slot is created, we need to clear all
- * mmio sptes.
- */
- if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
- kvm_arch_flush_shadow(kvm);
-
kvm_free_physmem_slot(&old, &new);
kfree(old_memslots);
return 0;
+out_slots:
+ kfree(slots);
out_free:
kvm_free_physmem_slot(&new, &old);
out:
return r;
-
}
EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
int kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
- int user_alloc)
+ bool user_alloc)
{
int r;
int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
struct
kvm_userspace_memory_region *mem,
- int user_alloc)
+ bool user_alloc)
{
- if (mem->slot >= KVM_MEMORY_SLOTS)
+ if (mem->slot >= KVM_USER_MEM_SLOTS)
return -EINVAL;
return kvm_set_memory_region(kvm, mem, user_alloc);
}
unsigned long any = 0;
r = -EINVAL;
- if (log->slot >= KVM_MEMORY_SLOTS)
+ if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
memslot = id_to_memslot(kvm->memslots, log->slot);
}
EXPORT_SYMBOL_GPL(kvm_disable_largepages);
-int is_error_page(struct page *page)
-{
- return IS_ERR(page);
-}
-EXPORT_SYMBOL_GPL(is_error_page);
-
-int is_error_pfn(pfn_t pfn)
-{
- return IS_ERR_VALUE(pfn);
-}
-EXPORT_SYMBOL_GPL(is_error_pfn);
-
-static pfn_t get_bad_pfn(void)
-{
- return -ENOENT;
-}
-
-pfn_t get_fault_pfn(void)
-{
- return -EFAULT;
-}
-EXPORT_SYMBOL_GPL(get_fault_pfn);
-
-static pfn_t get_hwpoison_pfn(void)
-{
- return -EHWPOISON;
-}
-
-int is_hwpoison_pfn(pfn_t pfn)
-{
- return pfn == -EHWPOISON;
-}
-EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
-
-int is_noslot_pfn(pfn_t pfn)
-{
- return pfn == -ENOENT;
-}
-EXPORT_SYMBOL_GPL(is_noslot_pfn);
-
-int is_invalid_pfn(pfn_t pfn)
-{
- return !is_noslot_pfn(pfn) && is_error_pfn(pfn);
-}
-EXPORT_SYMBOL_GPL(is_invalid_pfn);
-
-struct page *get_bad_page(void)
-{
- return ERR_PTR(-ENOENT);
-}
-
-static inline unsigned long bad_hva(void)
-{
- return PAGE_OFFSET;
-}
-
-int kvm_is_error_hva(unsigned long addr)
-{
- return addr == bad_hva();
-}
-EXPORT_SYMBOL_GPL(kvm_is_error_hva);
-
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
return __gfn_to_memslot(kvm_memslots(kvm), gfn);
{
struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
- if (!memslot || memslot->id >= KVM_MEMORY_SLOTS ||
+ if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
memslot->flags & KVM_MEMSLOT_INVALID)
return 0;
return size;
}
-static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
- gfn_t *nr_pages)
+static bool memslot_is_readonly(struct kvm_memory_slot *slot)
+{
+ return slot->flags & KVM_MEM_READONLY;
+}
+
+static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+ gfn_t *nr_pages, bool write)
{
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
- return bad_hva();
+ return KVM_HVA_ERR_BAD;
+
+ if (memslot_is_readonly(slot) && write)
+ return KVM_HVA_ERR_RO_BAD;
if (nr_pages)
*nr_pages = slot->npages - (gfn - slot->base_gfn);
- return gfn_to_hva_memslot(slot, gfn);
+ return __gfn_to_hva_memslot(slot, gfn);
}
+static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+ gfn_t *nr_pages)
+{
+ return __gfn_to_hva_many(slot, gfn, nr_pages, true);
+}
+
+unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+ gfn_t gfn)
+{
+ return gfn_to_hva_many(slot, gfn, NULL);
+}
+EXPORT_SYMBOL_GPL(gfn_to_hva_memslot);
+
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
{
return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
+/*
+ * The hva returned by this function is only allowed to be read.
+ * It should pair with kvm_read_hva() or kvm_read_hva_atomic().
+ */
+static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
+{
+ return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+}
+
+static int kvm_read_hva(void *data, void __user *hva, int len)
+{
+ return __copy_from_user(data, hva, len);
+}
+
+static int kvm_read_hva_atomic(void *data, void __user *hva, int len)
+{
+ return __copy_from_user_inatomic(data, hva, len);
+}
+
int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int write, struct page **page)
{
return rc == -EHWPOISON;
}
-static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
- bool write_fault, bool *writable)
+/*
+ * The atomic path to get the writable pfn which will be stored in @pfn,
+ * true indicates success, otherwise false is returned.
+ */
+static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async,
+ bool write_fault, bool *writable, pfn_t *pfn)
{
struct page *page[1];
- int npages = 0;
- pfn_t pfn;
+ int npages;
- /* we can do it either atomically or asynchronously, not both */
- BUG_ON(atomic && async);
+ if (!(async || atomic))
+ return false;
- BUG_ON(!write_fault && !writable);
+ /*
+ * Fast pin a writable pfn only if it is a write fault request
+ * or the caller allows to map a writable pfn for a read fault
+ * request.
+ */
+ if (!(write_fault || writable))
+ return false;
- if (writable)
- *writable = true;
+ npages = __get_user_pages_fast(addr, 1, 1, page);
+ if (npages == 1) {
+ *pfn = page_to_pfn(page[0]);
- if (atomic || async)
- npages = __get_user_pages_fast(addr, 1, 1, page);
+ if (writable)
+ *writable = true;
+ return true;
+ }
- if (unlikely(npages != 1) && !atomic) {
- might_sleep();
+ return false;
+}
- if (writable)
- *writable = write_fault;
+/*
+ * The slow path to get the pfn of the specified host virtual address,
+ * 1 indicates success, -errno is returned if error is detected.
+ */
+static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
+ bool *writable, pfn_t *pfn)
+{
+ struct page *page[1];
+ int npages = 0;
- if (async) {
- down_read(¤t->mm->mmap_sem);
- npages = get_user_page_nowait(current, current->mm,
- addr, write_fault, page);
- up_read(¤t->mm->mmap_sem);
- } else
- npages = get_user_pages_fast(addr, 1, write_fault,
- page);
-
- /* map read fault as writable if possible */
- if (unlikely(!write_fault) && npages == 1) {
- struct page *wpage[1];
-
- npages = __get_user_pages_fast(addr, 1, 1, wpage);
- if (npages == 1) {
- *writable = true;
- put_page(page[0]);
- page[0] = wpage[0];
- }
- npages = 1;
+ might_sleep();
+
+ if (writable)
+ *writable = write_fault;
+
+ if (async) {
+ down_read(¤t->mm->mmap_sem);
+ npages = get_user_page_nowait(current, current->mm,
+ addr, write_fault, page);
+ up_read(¤t->mm->mmap_sem);
+ } else
+ npages = get_user_pages_fast(addr, 1, write_fault,
+ page);
+ if (npages != 1)
+ return npages;
+
+ /* map read fault as writable if possible */
+ if (unlikely(!write_fault) && writable) {
+ struct page *wpage[1];
+
+ npages = __get_user_pages_fast(addr, 1, 1, wpage);
+ if (npages == 1) {
+ *writable = true;
+ put_page(page[0]);
+ page[0] = wpage[0];
}
+
+ npages = 1;
}
+ *pfn = page_to_pfn(page[0]);
+ return npages;
+}
- if (unlikely(npages != 1)) {
- struct vm_area_struct *vma;
+static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
+{
+ if (unlikely(!(vma->vm_flags & VM_READ)))
+ return false;
- if (atomic)
- return get_fault_pfn();
+ if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
+ return false;
- down_read(¤t->mm->mmap_sem);
- if (npages == -EHWPOISON ||
- (!async && check_user_page_hwpoison(addr))) {
- up_read(¤t->mm->mmap_sem);
- return get_hwpoison_pfn();
- }
+ return true;
+}
- vma = find_vma_intersection(current->mm, addr, addr+1);
-
- if (vma == NULL)
- pfn = get_fault_pfn();
- else if ((vma->vm_flags & VM_PFNMAP)) {
- pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
- vma->vm_pgoff;
- BUG_ON(!kvm_is_mmio_pfn(pfn));
- } else {
- if (async && (vma->vm_flags & VM_WRITE))
- *async = true;
- pfn = get_fault_pfn();
- }
- up_read(¤t->mm->mmap_sem);
- } else
- pfn = page_to_pfn(page[0]);
+/*
+ * Pin guest page in memory and return its pfn.
+ * @addr: host virtual address which maps memory to the guest
+ * @atomic: whether this function can sleep
+ * @async: whether this function need to wait IO complete if the
+ * host page is not in the memory
+ * @write_fault: whether we should get a writable host page
+ * @writable: whether it allows to map a writable host page for !@write_fault
+ *
+ * The function will map a writable host page for these two cases:
+ * 1): @write_fault = true
+ * 2): @write_fault = false && @writable, @writable will tell the caller
+ * whether the mapping is writable.
+ */
+static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+ bool write_fault, bool *writable)
+{
+ struct vm_area_struct *vma;
+ pfn_t pfn = 0;
+ int npages;
+
+ /* we can do it either atomically or asynchronously, not both */
+ BUG_ON(atomic && async);
+ if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
+ return pfn;
+
+ if (atomic)
+ return KVM_PFN_ERR_FAULT;
+
+ npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
+ if (npages == 1)
+ return pfn;
+
+ down_read(¤t->mm->mmap_sem);
+ if (npages == -EHWPOISON ||
+ (!async && check_user_page_hwpoison(addr))) {
+ pfn = KVM_PFN_ERR_HWPOISON;
+ goto exit;
+ }
+
+ vma = find_vma_intersection(current->mm, addr, addr + 1);
+
+ if (vma == NULL)
+ pfn = KVM_PFN_ERR_FAULT;
+ else if ((vma->vm_flags & VM_PFNMAP)) {
+ pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
+ vma->vm_pgoff;
+ BUG_ON(!kvm_is_mmio_pfn(pfn));
+ } else {
+ if (async && vma_is_valid(vma, write_fault))
+ *async = true;
+ pfn = KVM_PFN_ERR_FAULT;
+ }
+exit:
+ up_read(¤t->mm->mmap_sem);
return pfn;
}
-pfn_t hva_to_pfn_atomic(unsigned long addr)
+static pfn_t
+__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
+ bool *async, bool write_fault, bool *writable)
{
- return hva_to_pfn(addr, true, NULL, true, NULL);
+ unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
+
+ if (addr == KVM_HVA_ERR_RO_BAD)
+ return KVM_PFN_ERR_RO_FAULT;
+
+ if (kvm_is_error_hva(addr))
+ return KVM_PFN_NOSLOT;
+
+ /* Do not map writable pfn in the readonly memslot. */
+ if (writable && memslot_is_readonly(slot)) {
+ *writable = false;
+ writable = NULL;
+ }
+
+ return hva_to_pfn(addr, atomic, async, write_fault,
+ writable);
}
-EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
bool write_fault, bool *writable)
{
- unsigned long addr;
+ struct kvm_memory_slot *slot;
if (async)
*async = false;
- addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr))
- return get_bad_pfn();
+ slot = gfn_to_memslot(kvm, gfn);
- return hva_to_pfn(addr, atomic, async, write_fault, writable);
+ return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault,
+ writable);
}
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
- unsigned long addr = gfn_to_hva_memslot(slot, gfn);
- return hva_to_pfn(addr, false, NULL, true, NULL);
+ return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
+}
+
+pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
int nr_pages)
static struct page *kvm_pfn_to_page(pfn_t pfn)
{
- WARN_ON(kvm_is_mmio_pfn(pfn));
+ if (is_error_noslot_pfn(pfn))
+ return KVM_ERR_PTR_BAD_PAGE;
- if (is_error_pfn(pfn) || kvm_is_mmio_pfn(pfn))
- return get_bad_page();
+ if (kvm_is_mmio_pfn(pfn)) {
+ WARN_ON(1);
+ return KVM_ERR_PTR_BAD_PAGE;
+ }
return pfn_to_page(pfn);
}
void kvm_release_page_clean(struct page *page)
{
- if (!is_error_page(page))
- kvm_release_pfn_clean(page_to_pfn(page));
+ WARN_ON(is_error_page(page));
+
+ kvm_release_pfn_clean(page_to_pfn(page));
}
EXPORT_SYMBOL_GPL(kvm_release_page_clean);
void kvm_release_pfn_clean(pfn_t pfn)
{
- if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn))
+ if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn))
put_page(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
int r;
unsigned long addr;
- addr = gfn_to_hva(kvm, gfn);
+ addr = gfn_to_hva_read(kvm, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
- r = __copy_from_user(data, (void __user *)addr + offset, len);
+ r = kvm_read_hva(data, (void __user *)addr + offset, len);
if (r)
return -EFAULT;
return 0;
gfn_t gfn = gpa >> PAGE_SHIFT;
int offset = offset_in_page(gpa);
- addr = gfn_to_hva(kvm, gfn);
+ addr = gfn_to_hva_read(kvm, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
pagefault_disable();
- r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
+ r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len);
pagefault_enable();
if (r)
return -EFAULT;
}
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- gpa_t gpa)
+ gpa_t gpa, unsigned long len)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
int offset = offset_in_page(gpa);
- gfn_t gfn = gpa >> PAGE_SHIFT;
+ gfn_t start_gfn = gpa >> PAGE_SHIFT;
+ gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT;
+ gfn_t nr_pages_needed = end_gfn - start_gfn + 1;
+ gfn_t nr_pages_avail;
ghc->gpa = gpa;
ghc->generation = slots->generation;
- ghc->memslot = gfn_to_memslot(kvm, gfn);
- ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL);
- if (!kvm_is_error_hva(ghc->hva))
+ ghc->len = len;
+ ghc->memslot = gfn_to_memslot(kvm, start_gfn);
+ ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail);
+ if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) {
ghc->hva += offset;
- else
- return -EFAULT;
-
+ } else {
+ /*
+ * If the requested region crosses two memslots, we still
+ * verify that the entire region is valid here.
+ */
+ while (start_gfn <= end_gfn) {
+ ghc->memslot = gfn_to_memslot(kvm, start_gfn);
+ ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
+ &nr_pages_avail);
+ if (kvm_is_error_hva(ghc->hva))
+ return -EFAULT;
+ start_gfn += nr_pages_avail;
+ }
+ /* Use the slow path for cross page reads and writes. */
+ ghc->memslot = NULL;
+ }
return 0;
}
EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
struct kvm_memslots *slots = kvm_memslots(kvm);
int r;
+ BUG_ON(len > ghc->len);
+
if (slots->generation != ghc->generation)
- kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa);
+ kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
+
+ if (unlikely(!ghc->memslot))
+ return kvm_write_guest(kvm, ghc->gpa, data, len);
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
struct kvm_memslots *slots = kvm_memslots(kvm);
int r;
+ BUG_ON(len > ghc->len);
+
if (slots->generation != ghc->generation)
- kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa);
+ kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
+
+ if (unlikely(!ghc->memslot))
+ return kvm_read_guest(kvm, ghc->gpa, data, len);
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
- /* TODO: introduce set_bit_le() and use it */
- test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap);
+ set_bit_le(rel_gfn, memslot->dirty_bitmap);
}
}
{
struct pid *pid;
struct task_struct *task = NULL;
+ bool ret = false;
rcu_read_lock();
pid = rcu_dereference(target->pid);
task = get_pid_task(target->pid, PIDTYPE_PID);
rcu_read_unlock();
if (!task)
- return false;
+ return ret;
if (task->flags & PF_VCPU) {
put_task_struct(task);
- return false;
- }
- if (yield_to(task, 1)) {
- put_task_struct(task);
- return true;
+ return ret;
}
+ ret = yield_to(task, 1);
put_task_struct(task);
- return false;
+
+ return ret;
}
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
return eligible;
}
#endif
+
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
{
struct kvm *kvm = me->kvm;
struct kvm_vcpu *vcpu;
int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
int yielded = 0;
+ int try = 3;
int pass;
int i;
* VCPU is holding the lock that we need and will release it.
* We approximate round-robin by starting at the last boosted VCPU.
*/
- for (pass = 0; pass < 2 && !yielded; pass++) {
+ for (pass = 0; pass < 2 && !yielded && try; pass++) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!pass && i <= last_boosted_vcpu) {
i = last_boosted_vcpu;
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
- if (kvm_vcpu_yield_to(vcpu)) {
+
+ yielded = kvm_vcpu_yield_to(vcpu);
+ if (yielded > 0) {
kvm->last_boosted_vcpu = i;
- yielded = 1;
break;
+ } else if (yielded < 0) {
+ try--;
+ if (!try)
+ break;
}
}
}
atomic_inc(&kvm->online_vcpus);
mutex_unlock(&kvm->lock);
+ kvm_arch_vcpu_postcreate(vcpu);
return r;
unlock_vcpu_destroy:
if (vcpu->kvm->mm != current->mm)
return -EIO;
-#if defined(CONFIG_S390) || defined(CONFIG_PPC)
+#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
/*
* Special cases: vcpu ioctls that are asynchronous to vcpu execution,
* so vcpu_load() would break it.
#endif
- vcpu_load(vcpu);
+ r = vcpu_load(vcpu);
+ if (r)
+ return r;
switch (ioctl) {
case KVM_RUN:
r = -EINVAL;
goto out;
}
r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
- if (r)
- goto out_free2;
- r = 0;
-out_free2:
kfree(kvm_regs);
break;
}
kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs));
if (IS_ERR(kvm_sregs)) {
r = PTR_ERR(kvm_sregs);
+ kvm_sregs = NULL;
goto out;
}
r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_GET_MP_STATE: {
if (copy_from_user(&mp_state, argp, sizeof mp_state))
goto out;
r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_TRANSLATE: {
if (copy_from_user(&dbg, argp, sizeof dbg))
goto out;
r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_SET_SIGNAL_MASK: {
fpu = memdup_user(argp, sizeof(*fpu));
if (IS_ERR(fpu)) {
r = PTR_ERR(fpu);
+ fpu = NULL;
goto out;
}
r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
- if (r)
- goto out;
- r = 0;
break;
}
default:
if (copy_from_user(&csigset, sigmask_arg->sigset,
sizeof csigset))
goto out;
- }
- sigset_from_compat(&sigset, &csigset);
- r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
+ sigset_from_compat(&sigset, &csigset);
+ r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
+ } else
+ r = kvm_vcpu_ioctl_set_sigmask(vcpu, NULL);
break;
}
default:
switch (ioctl) {
case KVM_CREATE_VCPU:
r = kvm_vm_ioctl_create_vcpu(kvm, arg);
- if (r < 0)
- goto out;
break;
case KVM_SET_USER_MEMORY_REGION: {
struct kvm_userspace_memory_region kvm_userspace_mem;
sizeof kvm_userspace_mem))
goto out;
- r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
- if (r)
- goto out;
+ r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true);
break;
}
case KVM_GET_DIRTY_LOG: {
if (copy_from_user(&log, argp, sizeof log))
goto out;
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
- if (r)
- goto out;
break;
}
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
if (copy_from_user(&zone, argp, sizeof zone))
goto out;
r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_UNREGISTER_COALESCED_MMIO: {
if (copy_from_user(&zone, argp, sizeof zone))
goto out;
r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
- if (r)
- goto out;
- r = 0;
break;
}
#endif
log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
- if (r)
- goto out;
break;
}
default: