KVM: Move gfn_to_memslot() to kvm_host.h
[linux-3.10.git] / arch / powerpc / kvm / book3s_hv_rm_mmu.c
index d3e36fc..def880a 100644 (file)
 #include <asm/synch.h>
 #include <asm/ppc-opcode.h>
 
-/*
- * Since this file is built in even if KVM is a module, we need
- * a local copy of this function for the case where kvm_main.c is
- * modular.
- */
-static struct kvm_memory_slot *builtin_gfn_to_memslot(struct kvm *kvm,
-                                               gfn_t gfn)
-{
-       struct kvm_memslots *slots;
-       struct kvm_memory_slot *memslot;
-
-       slots = kvm_memslots(kvm);
-       kvm_for_each_memslot(memslot, slots)
-               if (gfn >= memslot->base_gfn &&
-                     gfn < memslot->base_gfn + memslot->npages)
-                       return memslot;
-       return NULL;
-}
-
 /* Translate address of a vmalloc'd thing to a linear map address */
 static void *real_vmalloc_addr(void *x)
 {
@@ -87,17 +68,19 @@ EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
 
 /* Remove this HPTE from the chain for a real page */
 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
-                               unsigned long hpte_v)
+                               struct revmap_entry *rev,
+                               unsigned long hpte_v, unsigned long hpte_r)
 {
-       struct revmap_entry *rev, *next, *prev;
+       struct revmap_entry *next, *prev;
        unsigned long gfn, ptel, head;
        struct kvm_memory_slot *memslot;
        unsigned long *rmap;
+       unsigned long rcbits;
 
-       rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
-       ptel = rev->guest_rpte;
+       rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
+       ptel = rev->guest_rpte |= rcbits;
        gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
-       memslot = builtin_gfn_to_memslot(kvm, gfn);
+       memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
        if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
                return;
 
@@ -116,6 +99,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
                else
                        *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
        }
+       *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
        unlock_rmap(rmap);
 }
 
@@ -140,6 +124,12 @@ static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
        return kvmppc_read_update_linux_pte(ptep, writing);
 }
 
+static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
+{
+       asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+       hpte[0] = hpte_v;
+}
+
 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
                    long pte_index, unsigned long pteh, unsigned long ptel)
 {
@@ -156,6 +146,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
        pte_t pte;
        unsigned int writing;
        unsigned long mmu_seq;
+       unsigned long rcbits;
        bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
 
        psize = hpte_page_size(pteh, ptel);
@@ -171,7 +162,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
        /* Find the memslot (if any) for this address */
        gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
        gfn = gpa >> PAGE_SHIFT;
-       memslot = builtin_gfn_to_memslot(kvm, gfn);
+       memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
        pa = 0;
        is_io = ~0ul;
        rmap = NULL;
@@ -314,6 +305,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
                } else {
                        kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
                                                realmode);
+                       /* Only set R/C in real HPTE if already set in *rmap */
+                       rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
+                       ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
                }
        }
 
@@ -356,6 +350,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
        struct kvm *kvm = vcpu->kvm;
        unsigned long *hpte;
        unsigned long v, r, rb;
+       struct revmap_entry *rev;
 
        if (pte_index >= HPT_NPTE)
                return H_PARAMETER;
@@ -368,30 +363,33 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
                hpte[0] &= ~HPTE_V_HVLOCK;
                return H_NOT_FOUND;
        }
-       if (atomic_read(&kvm->online_vcpus) == 1)
-               flags |= H_LOCAL;
-       vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
-       vcpu->arch.gpr[5] = r = hpte[1];
-       rb = compute_tlbie_rb(v, r, pte_index);
-       if (v & HPTE_V_VALID)
-               remove_revmap_chain(kvm, pte_index, v);
-       smp_wmb();
-       hpte[0] = 0;
-       if (!(v & HPTE_V_VALID))
-               return H_SUCCESS;
-       if (!(flags & H_LOCAL)) {
-               while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-                       cpu_relax();
-               asm volatile("ptesync" : : : "memory");
-               asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
-                            : : "r" (rb), "r" (kvm->arch.lpid));
-               asm volatile("ptesync" : : : "memory");
-               kvm->arch.tlbie_lock = 0;
-       } else {
-               asm volatile("ptesync" : : : "memory");
-               asm volatile("tlbiel %0" : : "r" (rb));
-               asm volatile("ptesync" : : : "memory");
+
+       rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+       v = hpte[0] & ~HPTE_V_HVLOCK;
+       if (v & HPTE_V_VALID) {
+               hpte[0] &= ~HPTE_V_VALID;
+               rb = compute_tlbie_rb(v, hpte[1], pte_index);
+               if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
+                       while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+                               cpu_relax();
+                       asm volatile("ptesync" : : : "memory");
+                       asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+                                    : : "r" (rb), "r" (kvm->arch.lpid));
+                       asm volatile("ptesync" : : : "memory");
+                       kvm->arch.tlbie_lock = 0;
+               } else {
+                       asm volatile("ptesync" : : : "memory");
+                       asm volatile("tlbiel %0" : : "r" (rb));
+                       asm volatile("ptesync" : : : "memory");
+               }
+               /* Read PTE low word after tlbie to get final R/C values */
+               remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
        }
+       r = rev->guest_rpte;
+       unlock_hpte(hpte, 0);
+
+       vcpu->arch.gpr[4] = v;
+       vcpu->arch.gpr[5] = r;
        return H_SUCCESS;
 }
 
@@ -399,82 +397,117 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 {
        struct kvm *kvm = vcpu->kvm;
        unsigned long *args = &vcpu->arch.gpr[4];
-       unsigned long *hp, tlbrb[4];
-       long int i, found;
-       long int n_inval = 0;
-       unsigned long flags, req, pte_index;
+       unsigned long *hp, *hptes[4], tlbrb[4];
+       long int i, j, k, n, found, indexes[4];
+       unsigned long flags, req, pte_index, rcbits;
        long int local = 0;
        long int ret = H_SUCCESS;
+       struct revmap_entry *rev, *revs[4];
 
        if (atomic_read(&kvm->online_vcpus) == 1)
                local = 1;
-       for (i = 0; i < 4; ++i) {
-               pte_index = args[i * 2];
-               flags = pte_index >> 56;
-               pte_index &= ((1ul << 56) - 1);
-               req = flags >> 6;
-               flags &= 3;
-               if (req == 3)
-                       break;
-               if (req != 1 || flags == 3 ||
-                   pte_index >= HPT_NPTE) {
-                       /* parameter error */
-                       args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
-                       ret = H_PARAMETER;
-                       break;
-               }
-               hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
-               while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
-                       cpu_relax();
-               found = 0;
-               if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
-                       switch (flags & 3) {
-                       case 0:         /* absolute */
-                               found = 1;
+       for (i = 0; i < 4 && ret == H_SUCCESS; ) {
+               n = 0;
+               for (; i < 4; ++i) {
+                       j = i * 2;
+                       pte_index = args[j];
+                       flags = pte_index >> 56;
+                       pte_index &= ((1ul << 56) - 1);
+                       req = flags >> 6;
+                       flags &= 3;
+                       if (req == 3) {         /* no more requests */
+                               i = 4;
                                break;
-                       case 1:         /* andcond */
-                               if (!(hp[0] & args[i * 2 + 1]))
-                                       found = 1;
+                       }
+                       if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
+                               /* parameter error */
+                               args[j] = ((0xa0 | flags) << 56) + pte_index;
+                               ret = H_PARAMETER;
                                break;
-                       case 2:         /* AVPN */
-                               if ((hp[0] & ~0x7fUL) == args[i * 2 + 1])
+                       }
+                       hp = (unsigned long *)
+                               (kvm->arch.hpt_virt + (pte_index << 4));
+                       /* to avoid deadlock, don't spin except for first */
+                       if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
+                               if (n)
+                                       break;
+                               while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
+                                       cpu_relax();
+                       }
+                       found = 0;
+                       if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
+                               switch (flags & 3) {
+                               case 0:         /* absolute */
                                        found = 1;
-                               break;
+                                       break;
+                               case 1:         /* andcond */
+                                       if (!(hp[0] & args[j + 1]))
+                                               found = 1;
+                                       break;
+                               case 2:         /* AVPN */
+                                       if ((hp[0] & ~0x7fUL) == args[j + 1])
+                                               found = 1;
+                                       break;
+                               }
+                       }
+                       if (!found) {
+                               hp[0] &= ~HPTE_V_HVLOCK;
+                               args[j] = ((0x90 | flags) << 56) + pte_index;
+                               continue;
+                       }
+
+                       args[j] = ((0x80 | flags) << 56) + pte_index;
+                       rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+
+                       if (!(hp[0] & HPTE_V_VALID)) {
+                               /* insert R and C bits from PTE */
+                               rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+                               args[j] |= rcbits << (56 - 5);
+                               continue;
                        }
+
+                       hp[0] &= ~HPTE_V_VALID;         /* leave it locked */
+                       tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+                       indexes[n] = j;
+                       hptes[n] = hp;
+                       revs[n] = rev;
+                       ++n;
                }
-               if (!found) {
-                       hp[0] &= ~HPTE_V_HVLOCK;
-                       args[i * 2] = ((0x90 | flags) << 56) + pte_index;
-                       continue;
+
+               if (!n)
+                       break;
+
+               /* Now that we've collected a batch, do the tlbies */
+               if (!local) {
+                       while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
+                               cpu_relax();
+                       asm volatile("ptesync" : : : "memory");
+                       for (k = 0; k < n; ++k)
+                               asm volatile(PPC_TLBIE(%1,%0) : :
+                                            "r" (tlbrb[k]),
+                                            "r" (kvm->arch.lpid));
+                       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+                       kvm->arch.tlbie_lock = 0;
+               } else {
+                       asm volatile("ptesync" : : : "memory");
+                       for (k = 0; k < n; ++k)
+                               asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
+                       asm volatile("ptesync" : : : "memory");
                }
-               /* insert R and C bits from PTE */
-               flags |= (hp[1] >> 5) & 0x0c;
-               args[i * 2] = ((0x80 | flags) << 56) + pte_index;
-               if (hp[0] & HPTE_V_VALID) {
-                       tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
-                       remove_revmap_chain(kvm, pte_index, hp[0]);
+
+               /* Read PTE low words after tlbie to get final R/C values */
+               for (k = 0; k < n; ++k) {
+                       j = indexes[k];
+                       pte_index = args[j] & ((1ul << 56) - 1);
+                       hp = hptes[k];
+                       rev = revs[k];
+                       remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
+                       rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+                       args[j] |= rcbits << (56 - 5);
+                       hp[0] = 0;
                }
-               smp_wmb();
-               hp[0] = 0;
-       }
-       if (n_inval == 0)
-               return ret;
-
-       if (!local) {
-               while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
-                       cpu_relax();
-               asm volatile("ptesync" : : : "memory");
-               for (i = 0; i < n_inval; ++i)
-                       asm volatile(PPC_TLBIE(%1,%0)
-                                    : : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
-               asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-               kvm->arch.tlbie_lock = 0;
-       } else {
-               asm volatile("ptesync" : : : "memory");
-               for (i = 0; i < n_inval; ++i)
-                       asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
-               asm volatile("ptesync" : : : "memory");
        }
+
        return ret;
 }
 
@@ -555,8 +588,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
                pte_index &= ~3;
                n = 4;
        }
-       if (flags & H_R_XLATE)
-               rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+       rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
        for (i = 0; i < n; ++i, ++pte_index) {
                hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
                v = hpte[0] & ~HPTE_V_HVLOCK;
@@ -565,12 +597,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
                        v &= ~HPTE_V_ABSENT;
                        v |= HPTE_V_VALID;
                }
-               if (v & HPTE_V_VALID) {
-                       if (rev)
-                               r = rev[i].guest_rpte;
-                       else
-                               r = hpte[1] | HPTE_R_RPN;
-               }
+               if (v & HPTE_V_VALID)
+                       r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
                vcpu->arch.gpr[4 + i * 2] = v;
                vcpu->arch.gpr[5 + i * 2] = r;
        }
@@ -594,6 +622,25 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
 }
 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
 
+void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
+                          unsigned long pte_index)
+{
+       unsigned long rb;
+       unsigned char rbyte;
+
+       rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
+       rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
+       /* modify only the second-last byte, which contains the ref bit */
+       *((char *)hptep + 14) = rbyte;
+       while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+               cpu_relax();
+       asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+                    : : "r" (rb), "r" (kvm->arch.lpid));
+       asm volatile("ptesync" : : : "memory");
+       kvm->arch.tlbie_lock = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
+
 static int slb_base_page_shift[4] = {
        24,     /* 16M */
        16,     /* 64k */
@@ -720,9 +767,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
        rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
        gr = rev->guest_rpte;
 
-       /* Unlock the HPTE */
-       asm volatile("lwsync" : : : "memory");
-       hpte[0] = v;
+       unlock_hpte(hpte, v);
 
        /* For not found, if the HPTE is valid by now, retry the instruction */
        if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))