KVM: MMU: delay flush all tlbs on sync_page path
[linux-2.6.git] / arch / x86 / kvm / paging_tmpl.h
index ba00eef..2b3d66c 100644 (file)
@@ -299,25 +299,42 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
                                        addr, access);
 }
 
+static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
+                                   struct kvm_mmu_page *sp, u64 *spte,
+                                   pt_element_t gpte)
+{
+       u64 nonpresent = shadow_trap_nonpresent_pte;
+
+       if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+               goto no_present;
+
+       if (!is_present_gpte(gpte)) {
+               if (!sp->unsync)
+                       nonpresent = shadow_notrap_nonpresent_pte;
+               goto no_present;
+       }
+
+       if (!(gpte & PT_ACCESSED_MASK))
+               goto no_present;
+
+       return false;
+
+no_present:
+       drop_spte(vcpu->kvm, spte, nonpresent);
+       return true;
+}
+
 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                              u64 *spte, const void *pte)
 {
        pt_element_t gpte;
        unsigned pte_access;
        pfn_t pfn;
-       u64 new_spte;
 
        gpte = *(const pt_element_t *)pte;
-       if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
-               if (!is_present_gpte(gpte)) {
-                       if (sp->unsync)
-                               new_spte = shadow_trap_nonpresent_pte;
-                       else
-                               new_spte = shadow_notrap_nonpresent_pte;
-                       __set_spte(spte, new_spte);
-               }
+       if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
                return;
-       }
+
        pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
        pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
        if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
@@ -329,7 +346,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                return;
        kvm_get_pfn(pfn);
        /*
-        * we call mmu_set_spte() with reset_host_protection = true beacuse that
+        * we call mmu_set_spte() with host_writable = true beacuse that
         * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
         */
        mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
@@ -364,7 +381,6 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
                                u64 *sptep)
 {
        struct kvm_mmu_page *sp;
-       struct kvm_mmu *mmu = &vcpu->arch.mmu;
        pt_element_t *gptep = gw->prefetch_ptes;
        u64 *spte;
        int i;
@@ -395,14 +411,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 
                gpte = gptep[i];
 
-               if (!is_present_gpte(gpte) ||
-                     is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) {
-                       if (!sp->unsync)
-                               __set_spte(spte, shadow_notrap_nonpresent_pte);
-                       continue;
-               }
-
-               if (!(gpte & PT_ACCESSED_MASK))
+               if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
                        continue;
 
                pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
@@ -737,12 +746,19 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
  * Using the cached information from sp->gfns is safe because:
  * - The spte has a reference to the struct page, so the pfn for a given gfn
  *   can't change unless all sptes pointing to it are nuked first.
+ *
+ * Note:
+ *   We should flush all tlbs if spte is dropped even though guest is
+ *   responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page
+ *   and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
+ *   used by guest then tlbs are not flushed, so guest is allowed to access the
+ *   freed pages.
+ *   And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
  */
-static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-                           bool clear_unsync)
+static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
        int i, offset, nr_present;
-       bool reset_host_protection;
+       bool host_writable;
        gpa_t first_pte_gpa;
 
        offset = nr_present = 0;
@@ -771,16 +787,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                        return -EINVAL;
 
                gfn = gpte_to_gfn(gpte);
-               if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)
-                     || gfn != sp->gfns[i] || !is_present_gpte(gpte)
-                     || !(gpte & PT_ACCESSED_MASK)) {
-                       u64 nonpresent;
 
-                       if (is_present_gpte(gpte) || !clear_unsync)
-                               nonpresent = shadow_trap_nonpresent_pte;
-                       else
-                               nonpresent = shadow_notrap_nonpresent_pte;
-                       drop_spte(vcpu->kvm, &sp->spt[i], nonpresent);
+               if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
+                       vcpu->kvm->tlbs_dirty++;
+                       continue;
+               }
+
+               if (gfn != sp->gfns[i]) {
+                       drop_spte(vcpu->kvm, &sp->spt[i],
+                                     shadow_trap_nonpresent_pte);
+                       vcpu->kvm->tlbs_dirty++;
                        continue;
                }
 
@@ -788,14 +804,14 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
                if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
                        pte_access &= ~ACC_WRITE_MASK;
-                       reset_host_protection = 0;
+                       host_writable = 0;
                } else {
-                       reset_host_protection = 1;
+                       host_writable = 1;
                }
                set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
                         is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
                         spte_to_pfn(sp->spt[i]), true, false,
-                        reset_host_protection);
+                        host_writable);
        }
 
        return !nr_present;