KVM: Push rmap into kvm_arch_memory_slot
Takuya Yoshikawa [Wed, 1 Aug 2012 09:03:28 +0000 (18:03 +0900)]
Two reasons:
 - x86 can integrate rmap and rmap_pde and remove heuristics in
   __gfn_to_rmap().
 - Some architectures do not need rmap.

Since rmap is one of the most memory consuming stuff in KVM, ppc'd
better restrict the allocation to Book3S HV.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Avi Kivity <avi@redhat.com>

arch/powerpc/include/asm/kvm_host.h
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_hv_rm_mmu.c
arch/powerpc/kvm/powerpc.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/mmu.c
arch/x86/kvm/x86.c
include/linux/kvm_host.h
virt/kvm/kvm_main.c

index 572ad01..a29e091 100644 (file)
@@ -221,6 +221,7 @@ struct revmap_entry {
 #define KVMPPC_GOT_PAGE                0x80
 
 struct kvm_arch_memory_slot {
+       unsigned long *rmap;
 };
 
 struct kvm_arch {
index 3c635c0..d95d113 100644 (file)
@@ -705,7 +705,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                goto out_unlock;
        hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
 
-       rmap = &memslot->rmap[gfn - memslot->base_gfn];
+       rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
        lock_rmap(rmap);
 
        /* Check if we might have been invalidated; let the guest retry if so */
@@ -788,7 +788,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
                for (; gfn < gfn_end; ++gfn) {
                        gfn_t gfn_offset = gfn - memslot->base_gfn;
 
-                       ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
+                       ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
                        retval |= ret;
                }
        }
@@ -1036,7 +1036,7 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
        unsigned long *rmapp, *map;
 
        preempt_disable();
-       rmapp = memslot->rmap;
+       rmapp = memslot->arch.rmap;
        map = memslot->dirty_bitmap;
        for (i = 0; i < memslot->npages; ++i) {
                if (kvm_test_clear_dirty(kvm, rmapp))
index 5c70d19..56ac1a5 100644 (file)
@@ -84,7 +84,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
        if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
                return;
 
-       rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
+       rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
        lock_rmap(rmap);
 
        head = *rmap & KVMPPC_RMAP_INDEX;
@@ -180,7 +180,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
        if (!slot_is_aligned(memslot, psize))
                return H_PARAMETER;
        slot_fn = gfn - memslot->base_gfn;
-       rmap = &memslot->rmap[slot_fn];
+       rmap = &memslot->arch.rmap[slot_fn];
 
        if (!kvm->arch.using_mmu_notifiers) {
                physp = kvm->arch.slot_phys[memslot->id];
index 87f4dc8..879b14a 100644 (file)
@@ -302,10 +302,18 @@ long kvm_arch_dev_ioctl(struct file *filp,
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
 {
+       if (!dont || free->arch.rmap != dont->arch.rmap) {
+               vfree(free->arch.rmap);
+               free->arch.rmap = NULL;
+       }
 }
 
 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 {
+       slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
+       if (!slot->arch.rmap)
+               return -ENOMEM;
+
        return 0;
 }
 
index 48e7131..1309e69 100644 (file)
@@ -504,7 +504,7 @@ struct kvm_lpage_info {
 };
 
 struct kvm_arch_memory_slot {
-       unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
+       unsigned long *rmap[KVM_NR_PAGE_SIZES];
        struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
index ee768bb..aa9a987 100644 (file)
@@ -970,11 +970,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 {
        unsigned long idx;
 
-       if (likely(level == PT_PAGE_TABLE_LEVEL))
-               return &slot->rmap[gfn - slot->base_gfn];
-
        idx = gfn_to_index(gfn, slot->base_gfn, level);
-       return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
+       return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
 }
 
 /*
index abc039d..ebf2109 100644 (file)
@@ -6303,14 +6303,18 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 {
        int i;
 
-       for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
-               if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
-                       kvm_kvfree(free->arch.rmap_pde[i]);
-                       free->arch.rmap_pde[i] = NULL;
+       for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+               if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
+                       kvm_kvfree(free->arch.rmap[i]);
+                       free->arch.rmap[i] = NULL;
                }
-               if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
-                       kvm_kvfree(free->arch.lpage_info[i]);
-                       free->arch.lpage_info[i] = NULL;
+               if (i == 0)
+                       continue;
+
+               if (!dont || free->arch.lpage_info[i - 1] !=
+                            dont->arch.lpage_info[i - 1]) {
+                       kvm_kvfree(free->arch.lpage_info[i - 1]);
+                       free->arch.lpage_info[i - 1] = NULL;
                }
        }
 }
@@ -6319,28 +6323,30 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 {
        int i;
 
-       for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+       for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
                unsigned long ugfn;
                int lpages;
-               int level = i + 2;
+               int level = i + 1;
 
                lpages = gfn_to_index(slot->base_gfn + npages - 1,
                                      slot->base_gfn, level) + 1;
 
-               slot->arch.rmap_pde[i] =
-                       kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
-               if (!slot->arch.rmap_pde[i])
+               slot->arch.rmap[i] =
+                       kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
+               if (!slot->arch.rmap[i])
                        goto out_free;
+               if (i == 0)
+                       continue;
 
-               slot->arch.lpage_info[i] =
-                       kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
-               if (!slot->arch.lpage_info[i])
+               slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
+                                       sizeof(*slot->arch.lpage_info[i - 1]));
+               if (!slot->arch.lpage_info[i - 1])
                        goto out_free;
 
                if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
-                       slot->arch.lpage_info[i][0].write_count = 1;
+                       slot->arch.lpage_info[i - 1][0].write_count = 1;
                if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
-                       slot->arch.lpage_info[i][lpages - 1].write_count = 1;
+                       slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
                ugfn = slot->userspace_addr >> PAGE_SHIFT;
                /*
                 * If the gfn and userspace address are not aligned wrt each
@@ -6352,18 +6358,21 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
                        unsigned long j;
 
                        for (j = 0; j < lpages; ++j)
-                               slot->arch.lpage_info[i][j].write_count = 1;
+                               slot->arch.lpage_info[i - 1][j].write_count = 1;
                }
        }
 
        return 0;
 
 out_free:
-       for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
-               kvm_kvfree(slot->arch.rmap_pde[i]);
-               kvm_kvfree(slot->arch.lpage_info[i]);
-               slot->arch.rmap_pde[i] = NULL;
-               slot->arch.lpage_info[i] = NULL;
+       for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+               kvm_kvfree(slot->arch.rmap[i]);
+               slot->arch.rmap[i] = NULL;
+               if (i == 0)
+                       continue;
+
+               kvm_kvfree(slot->arch.lpage_info[i - 1]);
+               slot->arch.lpage_info[i - 1] = NULL;
        }
        return -ENOMEM;
 }
index dbc65f9..3c16f0f 100644 (file)
@@ -213,7 +213,6 @@ struct kvm_memory_slot {
        gfn_t base_gfn;
        unsigned long npages;
        unsigned long flags;
-       unsigned long *rmap;
        unsigned long *dirty_bitmap;
        struct kvm_arch_memory_slot arch;
        unsigned long userspace_addr;
index bcf973e..14ec567 100644 (file)
@@ -550,16 +550,12 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
                                  struct kvm_memory_slot *dont)
 {
-       if (!dont || free->rmap != dont->rmap)
-               vfree(free->rmap);
-
        if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
                kvm_destroy_dirty_bitmap(free);
 
        kvm_arch_free_memslot(free, dont);
 
        free->npages = 0;
-       free->rmap = NULL;
 }
 
 void kvm_free_physmem(struct kvm *kvm)
@@ -768,11 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
        if (npages && !old.npages) {
                new.user_alloc = user_alloc;
                new.userspace_addr = mem->userspace_addr;
-#ifndef CONFIG_S390
-               new.rmap = vzalloc(npages * sizeof(*new.rmap));
-               if (!new.rmap)
-                       goto out_free;
-#endif /* not defined CONFIG_S390 */
+
                if (kvm_arch_create_memslot(&new, npages))
                        goto out_free;
        }
@@ -831,7 +823,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
        /* actual memory is freed via old in kvm_free_physmem_slot below */
        if (!npages) {
-               new.rmap = NULL;
                new.dirty_bitmap = NULL;
                memset(&new.arch, 0, sizeof(new.arch));
        }