]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - arch/x86/xen/p2m.c
xen: mark local pages as FOREIGN in the m2p_override
[linux-2.6.git] / arch / x86 / xen / p2m.c
index 2e3bf7a0732b29182a7ff83e6e4c2f6fe572e83a..00a038540c8e4f4e34ee02e7330b3a10d74aea29 100644 (file)
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/grant_table.h>
 
+#include "multicalls.h"
 #include "xen-ops.h"
 
 static void __init m2p_override_init(void);
@@ -676,13 +678,15 @@ static unsigned long mfn_hash(unsigned long mfn)
 }
 
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
+int m2p_add_override(unsigned long mfn, struct page *page,
+               struct gnttab_map_grant_ref *kmap_op)
 {
        unsigned long flags;
        unsigned long pfn;
        unsigned long uninitialized_var(address);
        unsigned level;
        pte_t *ptep = NULL;
+       int ret = 0;
 
        pfn = page_to_pfn(page);
        if (!PageHighMem(page)) {
@@ -692,20 +696,50 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
                                        "m2p_add_override: pfn %lx not mapped", pfn))
                        return -EINVAL;
        }
-
-       page->private = mfn;
+       WARN_ON(PagePrivate(page));
+       SetPagePrivate(page);
+       set_page_private(page, mfn);
        page->index = pfn_to_mfn(pfn);
 
        if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
                return -ENOMEM;
 
-       if (clear_pte && !PageHighMem(page))
-               /* Just zap old mapping for now */
-               pte_clear(&init_mm, address, ptep);
+       if (kmap_op != NULL) {
+               if (!PageHighMem(page)) {
+                       struct multicall_space mcs =
+                               xen_mc_entry(sizeof(*kmap_op));
+
+                       MULTI_grant_table_op(mcs.mc,
+                                       GNTTABOP_map_grant_ref, kmap_op, 1);
+
+                       xen_mc_issue(PARAVIRT_LAZY_MMU);
+               }
+               /* let's use dev_bus_addr to record the old mfn instead */
+               kmap_op->dev_bus_addr = page->index;
+               page->index = (unsigned long) kmap_op;
+       }
        spin_lock_irqsave(&m2p_override_lock, flags);
        list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
        spin_unlock_irqrestore(&m2p_override_lock, flags);
 
+       /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
+        * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
+        * pfn so that the following mfn_to_pfn(mfn) calls will return the
+        * pfn from the m2p_override (the backend pfn) instead.
+        * We need to do this because the pages shared by the frontend
+        * (xen-blkfront) can be already locked (lock_page, called by
+        * do_read_cache_page); when the userspace backend tries to use them
+        * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
+        * do_blockdev_direct_IO is going to try to lock the same pages
+        * again resulting in a deadlock.
+        * As a side effect get_user_pages_fast might not be safe on the
+        * frontend pages while they are being shared with the backend,
+        * because mfn_to_pfn (that ends up being called by GUPF) will
+        * return the backend pfn rather than the frontend pfn. */
+       ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+       if (ret == 0 && get_phys_to_machine(pfn) == mfn)
+               set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(m2p_add_override);
@@ -717,6 +751,7 @@ int m2p_remove_override(struct page *page, bool clear_pte)
        unsigned long uninitialized_var(address);
        unsigned level;
        pte_t *ptep = NULL;
+       int ret = 0;
 
        pfn = page_to_pfn(page);
        mfn = get_phys_to_machine(pfn);
@@ -735,13 +770,72 @@ int m2p_remove_override(struct page *page, bool clear_pte)
        spin_lock_irqsave(&m2p_override_lock, flags);
        list_del(&page->lru);
        spin_unlock_irqrestore(&m2p_override_lock, flags);
-       set_phys_to_machine(pfn, page->index);
+       WARN_ON(!PagePrivate(page));
+       ClearPagePrivate(page);
 
-       if (clear_pte && !PageHighMem(page))
-               set_pte_at(&init_mm, address, ptep,
-                               pfn_pte(pfn, PAGE_KERNEL));
-               /* No tlb flush necessary because the caller already
-                * left the pte unmapped. */
+       if (clear_pte) {
+               struct gnttab_map_grant_ref *map_op =
+                       (struct gnttab_map_grant_ref *) page->index;
+               set_phys_to_machine(pfn, map_op->dev_bus_addr);
+               if (!PageHighMem(page)) {
+                       struct multicall_space mcs;
+                       struct gnttab_unmap_grant_ref *unmap_op;
+
+                       /*
+                        * It might be that we queued all the m2p grant table
+                        * hypercalls in a multicall, then m2p_remove_override
+                        * get called before the multicall has actually been
+                        * issued. In this case handle is going to -1 because
+                        * it hasn't been modified yet.
+                        */
+                       if (map_op->handle == -1)
+                               xen_mc_flush();
+                       /*
+                        * Now if map_op->handle is negative it means that the
+                        * hypercall actually returned an error.
+                        */
+                       if (map_op->handle == GNTST_general_error) {
+                               printk(KERN_WARNING "m2p_remove_override: "
+                                               "pfn %lx mfn %lx, failed to modify kernel mappings",
+                                               pfn, mfn);
+                               return -1;
+                       }
+
+                       mcs = xen_mc_entry(
+                                       sizeof(struct gnttab_unmap_grant_ref));
+                       unmap_op = mcs.args;
+                       unmap_op->host_addr = map_op->host_addr;
+                       unmap_op->handle = map_op->handle;
+                       unmap_op->dev_bus_addr = 0;
+
+                       MULTI_grant_table_op(mcs.mc,
+                                       GNTTABOP_unmap_grant_ref, unmap_op, 1);
+
+                       xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+                       set_pte_at(&init_mm, address, ptep,
+                                       pfn_pte(pfn, PAGE_KERNEL));
+                       __flush_tlb_single(address);
+                       map_op->host_addr = 0;
+               }
+       } else
+               set_phys_to_machine(pfn, page->index);
+
+       /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
+        * somewhere in this domain, even before being added to the
+        * m2p_override (see comment above in m2p_add_override).
+        * If there are no other entries in the m2p_override corresponding
+        * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
+        * the original pfn (the one shared by the frontend): the backend
+        * cannot do any IO on this page anymore because it has been
+        * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
+        * the original pfn causes mfn_to_pfn(mfn) to return the frontend
+        * pfn again. */
+       mfn &= ~FOREIGN_FRAME_BIT;
+       ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+       if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+                       m2p_find_override(mfn) == NULL)
+               set_phys_to_machine(pfn, mfn);
 
        return 0;
 }
@@ -758,7 +852,7 @@ struct page *m2p_find_override(unsigned long mfn)
        spin_lock_irqsave(&m2p_override_lock, flags);
 
        list_for_each_entry(p, bucket, lru) {
-               if (p->private == mfn) {
+               if (page_private(p) == mfn) {
                        ret = p;
                        break;
                }
@@ -782,17 +876,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
 EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
 
 #ifdef CONFIG_XEN_DEBUG_FS
-
-int p2m_dump_show(struct seq_file *m, void *v)
+#include <linux/debugfs.h>
+#include "debugfs.h"
+static int p2m_dump_show(struct seq_file *m, void *v)
 {
        static const char * const level_name[] = { "top", "middle",
                                                "entry", "abnormal", "error"};
-       static const char * const type_name[] = { "identity", "missing",
-                                               "pfn", "abnormal"};
 #define TYPE_IDENTITY 0
 #define TYPE_MISSING 1
 #define TYPE_PFN 2
 #define TYPE_UNKNOWN 3
+       static const char * const type_name[] = {
+                               [TYPE_IDENTITY] = "identity",
+                               [TYPE_MISSING] = "missing",
+                               [TYPE_PFN] = "pfn",
+                               [TYPE_UNKNOWN] = "abnormal"};
        unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
        unsigned int uninitialized_var(prev_level);
        unsigned int uninitialized_var(prev_type);
@@ -856,4 +954,32 @@ int p2m_dump_show(struct seq_file *m, void *v)
 #undef TYPE_PFN
 #undef TYPE_UNKNOWN
 }
-#endif
+
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+       .open           = p2m_dump_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static struct dentry *d_mmu_debug;
+
+static int __init xen_p2m_debugfs(void)
+{
+       struct dentry *d_xen = xen_init_debugfs();
+
+       if (d_xen == NULL)
+               return -ENOMEM;
+
+       d_mmu_debug = debugfs_create_dir("mmu", d_xen);
+
+       debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
+       return 0;
+}
+fs_initcall(xen_p2m_debugfs);
+#endif /* CONFIG_XEN_DEBUG_FS */