KVM: add support for change_pte mmu notifiers
Izik Eidus [Wed, 23 Sep 2009 18:47:18 +0000 (21:47 +0300)]
this is needed for kvm if it want ksm to directly map pages into its
shadow page tables.

[marcelo: cast pfn assignment to u64]

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

arch/x86/include/asm/kvm_host.h
arch/x86/kvm/mmu.c
virt/kvm/kvm_main.c

index 3be0004..d838922 100644 (file)
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
index 5cd8b4e..685a4ff 100644 (file)
@@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
        return write_protected;
 }
 
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
 {
        u64 *spte;
        int need_tlb_flush = 0;
@@ -763,8 +763,45 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
        return need_tlb_flush;
 }
 
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-                         int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
+{
+       int need_flush = 0;
+       u64 *spte, new_spte;
+       pte_t *ptep = (pte_t *)data;
+       pfn_t new_pfn;
+
+       WARN_ON(pte_huge(*ptep));
+       new_pfn = pte_pfn(*ptep);
+       spte = rmap_next(kvm, rmapp, NULL);
+       while (spte) {
+               BUG_ON(!is_shadow_present_pte(*spte));
+               rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
+               need_flush = 1;
+               if (pte_write(*ptep)) {
+                       rmap_remove(kvm, spte);
+                       __set_spte(spte, shadow_trap_nonpresent_pte);
+                       spte = rmap_next(kvm, rmapp, NULL);
+               } else {
+                       new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
+                       new_spte |= (u64)new_pfn << PAGE_SHIFT;
+
+                       new_spte &= ~PT_WRITABLE_MASK;
+                       new_spte &= ~SPTE_HOST_WRITEABLE;
+                       if (is_writeble_pte(*spte))
+                               kvm_set_pfn_dirty(spte_to_pfn(*spte));
+                       __set_spte(spte, new_spte);
+                       spte = rmap_next(kvm, rmapp, spte);
+               }
+       }
+       if (need_flush)
+               kvm_flush_remote_tlbs(kvm);
+
+       return 0;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data,
+                         int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+                                        u64 data))
 {
        int i, j;
        int retval = 0;
@@ -786,13 +823,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
                if (hva >= start && hva < end) {
                        gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 
-                       retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+                       retval |= handler(kvm, &memslot->rmap[gfn_offset],
+                                         data);
 
                        for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
                                int idx = gfn_offset;
                                idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
                                retval |= handler(kvm,
-                                       &memslot->lpage_info[j][idx].rmap_pde);
+                                       &memslot->lpage_info[j][idx].rmap_pde,
+                                       data);
                        }
                }
        }
@@ -802,10 +841,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
-       return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+       return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+       kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp);
 }
 
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
 {
        u64 *spte;
        int young = 0;
@@ -841,13 +885,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
        gfn = unalias_gfn(vcpu->kvm, gfn);
        rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-       kvm_unmap_rmapp(vcpu->kvm, rmapp);
+       kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
        kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
-       return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+       return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
 }
 
 #ifdef MMU_DEBUG
index e79c540..b7c78a4 100644 (file)
@@ -850,6 +850,19 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 
 }
 
+static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
+                                       struct mm_struct *mm,
+                                       unsigned long address,
+                                       pte_t pte)
+{
+       struct kvm *kvm = mmu_notifier_to_kvm(mn);
+
+       spin_lock(&kvm->mmu_lock);
+       kvm->mmu_notifier_seq++;
+       kvm_set_spte_hva(kvm, address, pte);
+       spin_unlock(&kvm->mmu_lock);
+}
+
 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
                                                    struct mm_struct *mm,
                                                    unsigned long start,
@@ -929,6 +942,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
        .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
        .invalidate_range_end   = kvm_mmu_notifier_invalidate_range_end,
        .clear_flush_young      = kvm_mmu_notifier_clear_flush_young,
+       .change_pte             = kvm_mmu_notifier_change_pte,
        .release                = kvm_mmu_notifier_release,
 };
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */