xen: mark local pages as FOREIGN in the m2p_override
[linux-2.6.git] / arch / x86 / xen / p2m.c
index 141eb0d..00a0385 100644 (file)
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/grant_table.h>
 
+#include "multicalls.h"
 #include "xen-ops.h"
 
 static void __init m2p_override_init(void);
@@ -522,11 +524,20 @@ static bool __init __early_alloc_p2m(unsigned long pfn)
        /* Boundary cross-over for the edges: */
        if (idx) {
                unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
+               unsigned long *mid_mfn_p;
 
                p2m_init(p2m);
 
                p2m_top[topidx][mididx] = p2m;
 
+               /* For save/restore we need to MFN of the P2M saved */
+               
+               mid_mfn_p = p2m_top_mfn_p[topidx];
+               WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
+                       "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
+                       topidx, mididx);
+               mid_mfn_p[mididx] = virt_to_mfn(p2m);
+
        }
        return idx != 0;
 }
@@ -549,12 +560,29 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
                pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
        {
                unsigned topidx = p2m_top_index(pfn);
-               if (p2m_top[topidx] == p2m_mid_missing) {
-                       unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+               unsigned long *mid_mfn_p;
+               unsigned long **mid;
+
+               mid = p2m_top[topidx];
+               mid_mfn_p = p2m_top_mfn_p[topidx];
+               if (mid == p2m_mid_missing) {
+                       mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
 
                        p2m_mid_init(mid);
 
                        p2m_top[topidx] = mid;
+
+                       BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
+               }
+               /* And the save/restore P2M tables.. */
+               if (mid_mfn_p == p2m_mid_missing_mfn) {
+                       mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+                       p2m_mid_mfn_init(mid_mfn_p);
+
+                       p2m_top_mfn_p[topidx] = mid_mfn_p;
+                       p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
+                       /* Note: we don't set mid_mfn_p[midix] here,
+                        * look in __early_alloc_p2m */
                }
        }
 
@@ -650,42 +678,72 @@ static unsigned long mfn_hash(unsigned long mfn)
 }
 
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page)
+int m2p_add_override(unsigned long mfn, struct page *page,
+               struct gnttab_map_grant_ref *kmap_op)
 {
        unsigned long flags;
        unsigned long pfn;
        unsigned long uninitialized_var(address);
        unsigned level;
        pte_t *ptep = NULL;
+       int ret = 0;
 
        pfn = page_to_pfn(page);
        if (!PageHighMem(page)) {
                address = (unsigned long)__va(pfn << PAGE_SHIFT);
                ptep = lookup_address(address, &level);
-
                if (WARN(ptep == NULL || level != PG_LEVEL_4K,
                                        "m2p_add_override: pfn %lx not mapped", pfn))
                        return -EINVAL;
        }
-
-       page->private = mfn;
+       WARN_ON(PagePrivate(page));
+       SetPagePrivate(page);
+       set_page_private(page, mfn);
        page->index = pfn_to_mfn(pfn);
 
        if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
                return -ENOMEM;
 
-       if (!PageHighMem(page))
-               /* Just zap old mapping for now */
-               pte_clear(&init_mm, address, ptep);
+       if (kmap_op != NULL) {
+               if (!PageHighMem(page)) {
+                       struct multicall_space mcs =
+                               xen_mc_entry(sizeof(*kmap_op));
 
+                       MULTI_grant_table_op(mcs.mc,
+                                       GNTTABOP_map_grant_ref, kmap_op, 1);
+
+                       xen_mc_issue(PARAVIRT_LAZY_MMU);
+               }
+               /* let's use dev_bus_addr to record the old mfn instead */
+               kmap_op->dev_bus_addr = page->index;
+               page->index = (unsigned long) kmap_op;
+       }
        spin_lock_irqsave(&m2p_override_lock, flags);
        list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
        spin_unlock_irqrestore(&m2p_override_lock, flags);
 
+       /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
+        * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
+        * pfn so that the following mfn_to_pfn(mfn) calls will return the
+        * pfn from the m2p_override (the backend pfn) instead.
+        * We need to do this because the pages shared by the frontend
+        * (xen-blkfront) can be already locked (lock_page, called by
+        * do_read_cache_page); when the userspace backend tries to use them
+        * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
+        * do_blockdev_direct_IO is going to try to lock the same pages
+        * again resulting in a deadlock.
+        * As a side effect get_user_pages_fast might not be safe on the
+        * frontend pages while they are being shared with the backend,
+        * because mfn_to_pfn (that ends up being called by GUPF) will
+        * return the backend pfn rather than the frontend pfn. */
+       ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+       if (ret == 0 && get_phys_to_machine(pfn) == mfn)
+               set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
+
        return 0;
 }
-
-int m2p_remove_override(struct page *page)
+EXPORT_SYMBOL_GPL(m2p_add_override);
+int m2p_remove_override(struct page *page, bool clear_pte)
 {
        unsigned long flags;
        unsigned long mfn;
@@ -693,6 +751,7 @@ int m2p_remove_override(struct page *page)
        unsigned long uninitialized_var(address);
        unsigned level;
        pte_t *ptep = NULL;
+       int ret = 0;
 
        pfn = page_to_pfn(page);
        mfn = get_phys_to_machine(pfn);
@@ -711,16 +770,76 @@ int m2p_remove_override(struct page *page)
        spin_lock_irqsave(&m2p_override_lock, flags);
        list_del(&page->lru);
        spin_unlock_irqrestore(&m2p_override_lock, flags);
-       set_phys_to_machine(pfn, page->index);
+       WARN_ON(!PagePrivate(page));
+       ClearPagePrivate(page);
 
-       if (!PageHighMem(page))
-               set_pte_at(&init_mm, address, ptep,
-                               pfn_pte(pfn, PAGE_KERNEL));
-               /* No tlb flush necessary because the caller already
-                * left the pte unmapped. */
+       if (clear_pte) {
+               struct gnttab_map_grant_ref *map_op =
+                       (struct gnttab_map_grant_ref *) page->index;
+               set_phys_to_machine(pfn, map_op->dev_bus_addr);
+               if (!PageHighMem(page)) {
+                       struct multicall_space mcs;
+                       struct gnttab_unmap_grant_ref *unmap_op;
+
+                       /*
+                        * It might be that we queued all the m2p grant table
+                        * hypercalls in a multicall, then m2p_remove_override
+                        * get called before the multicall has actually been
+                        * issued. In this case handle is going to -1 because
+                        * it hasn't been modified yet.
+                        */
+                       if (map_op->handle == -1)
+                               xen_mc_flush();
+                       /*
+                        * Now if map_op->handle is negative it means that the
+                        * hypercall actually returned an error.
+                        */
+                       if (map_op->handle == GNTST_general_error) {
+                               printk(KERN_WARNING "m2p_remove_override: "
+                                               "pfn %lx mfn %lx, failed to modify kernel mappings",
+                                               pfn, mfn);
+                               return -1;
+                       }
+
+                       mcs = xen_mc_entry(
+                                       sizeof(struct gnttab_unmap_grant_ref));
+                       unmap_op = mcs.args;
+                       unmap_op->host_addr = map_op->host_addr;
+                       unmap_op->handle = map_op->handle;
+                       unmap_op->dev_bus_addr = 0;
+
+                       MULTI_grant_table_op(mcs.mc,
+                                       GNTTABOP_unmap_grant_ref, unmap_op, 1);
+
+                       xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+                       set_pte_at(&init_mm, address, ptep,
+                                       pfn_pte(pfn, PAGE_KERNEL));
+                       __flush_tlb_single(address);
+                       map_op->host_addr = 0;
+               }
+       } else
+               set_phys_to_machine(pfn, page->index);
+
+       /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
+        * somewhere in this domain, even before being added to the
+        * m2p_override (see comment above in m2p_add_override).
+        * If there are no other entries in the m2p_override corresponding
+        * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
+        * the original pfn (the one shared by the frontend): the backend
+        * cannot do any IO on this page anymore because it has been
+        * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
+        * the original pfn causes mfn_to_pfn(mfn) to return the frontend
+        * pfn again. */
+       mfn &= ~FOREIGN_FRAME_BIT;
+       ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+       if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+                       m2p_find_override(mfn) == NULL)
+               set_phys_to_machine(pfn, mfn);
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(m2p_remove_override);
 
 struct page *m2p_find_override(unsigned long mfn)
 {
@@ -733,7 +852,7 @@ struct page *m2p_find_override(unsigned long mfn)
        spin_lock_irqsave(&m2p_override_lock, flags);
 
        list_for_each_entry(p, bucket, lru) {
-               if (p->private == mfn) {
+               if (page_private(p) == mfn) {
                        ret = p;
                        break;
                }
@@ -757,17 +876,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
 EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
 
 #ifdef CONFIG_XEN_DEBUG_FS
-
-int p2m_dump_show(struct seq_file *m, void *v)
+#include <linux/debugfs.h>
+#include "debugfs.h"
+static int p2m_dump_show(struct seq_file *m, void *v)
 {
        static const char * const level_name[] = { "top", "middle",
-                                               "entry", "abnormal" };
-       static const char * const type_name[] = { "identity", "missing",
-                                               "pfn", "abnormal"};
+                                               "entry", "abnormal", "error"};
 #define TYPE_IDENTITY 0
 #define TYPE_MISSING 1
 #define TYPE_PFN 2
 #define TYPE_UNKNOWN 3
+       static const char * const type_name[] = {
+                               [TYPE_IDENTITY] = "identity",
+                               [TYPE_MISSING] = "missing",
+                               [TYPE_PFN] = "pfn",
+                               [TYPE_UNKNOWN] = "abnormal"};
        unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
        unsigned int uninitialized_var(prev_level);
        unsigned int uninitialized_var(prev_type);
@@ -831,4 +954,32 @@ int p2m_dump_show(struct seq_file *m, void *v)
 #undef TYPE_PFN
 #undef TYPE_UNKNOWN
 }
-#endif
+
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+       .open           = p2m_dump_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static struct dentry *d_mmu_debug;
+
+static int __init xen_p2m_debugfs(void)
+{
+       struct dentry *d_xen = xen_init_debugfs();
+
+       if (d_xen == NULL)
+               return -ENOMEM;
+
+       d_mmu_debug = debugfs_create_dir("mmu", d_xen);
+
+       debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
+       return 0;
+}
+fs_initcall(xen_p2m_debugfs);
+#endif /* CONFIG_XEN_DEBUG_FS */