Merge branch 'xen-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
[linux-3.10.git] / arch / x86 / xen / enlighten.c
index 493a083..94c39aa 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/mm.h>
 #include <linux/page-flags.h>
 #include <linux/highmem.h>
-#include <linux/smp.h>
 
 #include <xen/interface/xen.h>
 #include <xen/interface/physdev.h>
 
 EXPORT_SYMBOL_GPL(hypercall_page);
 
-DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
-
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
-DEFINE_PER_CPU(unsigned long, xen_cr3);
+
+/*
+ * Note about cr3 (pagetable base) values:
+ *
+ * xen_cr3 contains the current logical cr3 value; it contains the
+ * last set cr3.  This may not be the current effective cr3, because
+ * its update may be being lazily deferred.  However, a vcpu looking
+ * at its own cr3 can use this value knowing that it everything will
+ * be self-consistent.
+ *
+ * xen_current_cr3 contains the actual vcpu cr3; it is set once the
+ * hypercall to set the vcpu cr3 is complete (so it may be a little
+ * out of date, but it will never be set early).  If one vcpu is
+ * looking at another vcpu's cr3 value, it should use this variable.
+ */
+DEFINE_PER_CPU(unsigned long, xen_cr3);         /* cr3 stored as physaddr */
+DEFINE_PER_CPU(unsigned long, xen_current_cr3);         /* actual vcpu cr3 */
 
 struct start_info *xen_start_info;
 EXPORT_SYMBOL_GPL(xen_start_info);
@@ -100,7 +113,7 @@ static void __init xen_vcpu_setup(int cpu)
        info.mfn = virt_to_mfn(vcpup);
        info.offset = offset_in_page(vcpup);
 
-       printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n",
+       printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
               cpu, vcpup, info.mfn, info.offset);
 
        /* Check to see if the hypervisor will put the vcpu_info
@@ -124,7 +137,7 @@ static void __init xen_vcpu_setup(int cpu)
 static void __init xen_banner(void)
 {
        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
-              paravirt_ops.name);
+              pv_info.name);
        printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
 }
 
@@ -249,29 +262,10 @@ static void xen_halt(void)
                xen_safe_halt();
 }
 
-static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
+static void xen_leave_lazy(void)
 {
-       BUG_ON(preemptible());
-
-       switch (mode) {
-       case PARAVIRT_LAZY_NONE:
-               BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
-               break;
-
-       case PARAVIRT_LAZY_MMU:
-       case PARAVIRT_LAZY_CPU:
-               BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE);
-               break;
-
-       case PARAVIRT_LAZY_FLUSH:
-               /* flush if necessary, but don't change state */
-               if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE)
-                       xen_mc_flush();
-               return;
-       }
-
+       paravirt_leave_lazy(paravirt_get_lazy_mode());
        xen_mc_flush();
-       x86_write_percpu(xen_lazy_mode, mode);
 }
 
 static unsigned long xen_store_tr(void)
@@ -358,7 +352,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
         * loaded properly.  This will go away as soon as Xen has been
         * modified to not save/restore %gs for normal hypercalls.
         */
-       if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU)
+       if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
                loadsegment(gs, 0);
 }
 
@@ -632,32 +626,36 @@ static unsigned long xen_read_cr3(void)
        return x86_read_percpu(xen_cr3);
 }
 
+static void set_current_cr3(void *v)
+{
+       x86_write_percpu(xen_current_cr3, (unsigned long)v);
+}
+
 static void xen_write_cr3(unsigned long cr3)
 {
+       struct mmuext_op *op;
+       struct multicall_space mcs;
+       unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
+
        BUG_ON(preemptible());
 
-       if (cr3 == x86_read_percpu(xen_cr3)) {
-               /* just a simple tlb flush */
-               xen_flush_tlb();
-               return;
-       }
+       mcs = xen_mc_entry(sizeof(*op));  /* disables interrupts */
 
+       /* Update while interrupts are disabled, so its atomic with
+          respect to ipis */
        x86_write_percpu(xen_cr3, cr3);
 
+       op = mcs.args;
+       op->cmd = MMUEXT_NEW_BASEPTR;
+       op->arg1.mfn = mfn;
 
-       {
-               struct mmuext_op *op;
-               struct multicall_space mcs = xen_mc_entry(sizeof(*op));
-               unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
-
-               op = mcs.args;
-               op->cmd = MMUEXT_NEW_BASEPTR;
-               op->arg1.mfn = mfn;
+       MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 
-               MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+       /* Update xen_update_cr3 once the batch has actually
+          been submitted. */
+       xen_mc_callback(set_current_cr3, (void *)cr3);
 
-               xen_mc_issue(PARAVIRT_LAZY_CPU);
-       }
+       xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 }
 
 /* Early in boot, while setting up the initial pagetable, assume
@@ -668,6 +666,15 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
        make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
 }
 
+static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
+{
+       struct mmuext_op op;
+       op.cmd = level;
+       op.arg1.mfn = pfn_to_mfn(pfn);
+       if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+               BUG();
+}
+
 /* This needs to make sure the new pte page is pinned iff its being
    attached to a pinned pagetable. */
 static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
@@ -677,9 +684,10 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
        if (PagePinned(virt_to_page(mm->pgd))) {
                SetPagePinned(page);
 
-               if (!PageHighMem(page))
+               if (!PageHighMem(page)) {
                        make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
-               else
+                       pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+               } else
                        /* make sure there are no stray mappings of
                           this page */
                        kmap_flush_unused();
@@ -692,8 +700,10 @@ static void xen_release_pt(u32 pfn)
        struct page *page = pfn_to_page(pfn);
 
        if (PagePinned(page)) {
-               if (!PageHighMem(page))
+               if (!PageHighMem(page)) {
+                       pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
                        make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+               }
        }
 }
 
@@ -738,7 +748,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
        pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
 
        /* special set_pte for pagetable initialization */
-       paravirt_ops.set_pte = xen_set_pte_init;
+       pv_mmu_ops.set_pte = xen_set_pte_init;
 
        init_mm.pgd = base;
        /*
@@ -785,8 +795,8 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 {
        /* This will work as long as patching hasn't happened yet
           (which it hasn't) */
-       paravirt_ops.alloc_pt = xen_alloc_pt;
-       paravirt_ops.set_pte = xen_set_pte;
+       pv_mmu_ops.alloc_pt = xen_alloc_pt;
+       pv_mmu_ops.set_pte = xen_set_pte;
 
        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
                /*
@@ -808,15 +818,15 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
        /* Actually pin the pagetable down, but we can't set PG_pinned
           yet because the page structures don't exist yet. */
        {
-               struct mmuext_op op;
+               unsigned level;
+
 #ifdef CONFIG_X86_PAE
-               op.cmd = MMUEXT_PIN_L3_TABLE;
+               level = MMUEXT_PIN_L3_TABLE;
 #else
-               op.cmd = MMUEXT_PIN_L3_TABLE;
+               level = MMUEXT_PIN_L2_TABLE;
 #endif
-               op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base)));
-               if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
-                       BUG();
+
+               pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
        }
 }
 
@@ -833,12 +843,12 @@ void __init xen_setup_vcpu_info_placement(void)
        if (have_vcpu_info_placement) {
                printk(KERN_INFO "Xen: using vcpu_info placement\n");
 
-               paravirt_ops.save_fl = xen_save_fl_direct;
-               paravirt_ops.restore_fl = xen_restore_fl_direct;
-               paravirt_ops.irq_disable = xen_irq_disable_direct;
-               paravirt_ops.irq_enable = xen_irq_enable_direct;
-               paravirt_ops.read_cr2 = xen_read_cr2_direct;
-               paravirt_ops.iret = xen_iret_direct;
+               pv_irq_ops.save_fl = xen_save_fl_direct;
+               pv_irq_ops.restore_fl = xen_restore_fl_direct;
+               pv_irq_ops.irq_disable = xen_irq_disable_direct;
+               pv_irq_ops.irq_enable = xen_irq_enable_direct;
+               pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
+               pv_cpu_ops.iret = xen_iret_direct;
        }
 }
 
@@ -850,8 +860,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
 
        start = end = reloc = NULL;
 
-#define SITE(x)                                                                \
-       case PARAVIRT_PATCH(x):                                         \
+#define SITE(op, x)                                                    \
+       case PARAVIRT_PATCH(op.x):                                      \
        if (have_vcpu_info_placement) {                                 \
                start = (char *)xen_##x##_direct;                       \
                end = xen_##x##_direct_end;                             \
@@ -860,10 +870,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
        goto patch_site
 
        switch (type) {
-               SITE(irq_enable);
-               SITE(irq_disable);
-               SITE(save_fl);
-               SITE(restore_fl);
+               SITE(pv_irq_ops, irq_enable);
+               SITE(pv_irq_ops, irq_disable);
+               SITE(pv_irq_ops, save_fl);
+               SITE(pv_irq_ops, restore_fl);
 #undef SITE
 
        patch_site:
@@ -895,26 +905,32 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
        return ret;
 }
 
-static const struct paravirt_ops xen_paravirt_ops __initdata = {
+static const struct pv_info xen_info __initdata = {
        .paravirt_enabled = 1,
        .shared_kernel_pmd = 0,
 
        .name = "Xen",
-       .banner = xen_banner,
+};
 
+static const struct pv_init_ops xen_init_ops __initdata = {
        .patch = xen_patch,
 
+       .banner = xen_banner,
        .memory_setup = xen_memory_setup,
        .arch_setup = xen_arch_setup,
-       .init_IRQ = xen_init_IRQ,
        .post_allocator_init = xen_mark_init_mm_pinned,
+};
 
+static const struct pv_time_ops xen_time_ops __initdata = {
        .time_init = xen_time_init,
+
        .set_wallclock = xen_set_wallclock,
        .get_wallclock = xen_get_wallclock,
        .get_cpu_khz = xen_cpu_khz,
        .sched_clock = xen_sched_clock,
+};
 
+static const struct pv_cpu_ops xen_cpu_ops __initdata = {
        .cpuid = xen_cpuid,
 
        .set_debugreg = xen_set_debugreg,
@@ -925,22 +941,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .read_cr0 = native_read_cr0,
        .write_cr0 = native_write_cr0,
 
-       .read_cr2 = xen_read_cr2,
-       .write_cr2 = xen_write_cr2,
-
-       .read_cr3 = xen_read_cr3,
-       .write_cr3 = xen_write_cr3,
-
        .read_cr4 = native_read_cr4,
        .read_cr4_safe = native_read_cr4_safe,
        .write_cr4 = xen_write_cr4,
 
-       .save_fl = xen_save_fl,
-       .restore_fl = xen_restore_fl,
-       .irq_disable = xen_irq_disable,
-       .irq_enable = xen_irq_enable,
-       .safe_halt = xen_safe_halt,
-       .halt = xen_halt,
        .wbinvd = native_wbinvd,
 
        .read_msr = native_read_msr_safe,
@@ -969,6 +973,23 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .set_iopl_mask = xen_set_iopl_mask,
        .io_delay = xen_io_delay,
 
+       .lazy_mode = {
+               .enter = paravirt_enter_lazy_cpu,
+               .leave = xen_leave_lazy,
+       },
+};
+
+static const struct pv_irq_ops xen_irq_ops __initdata = {
+       .init_IRQ = xen_init_IRQ,
+       .save_fl = xen_save_fl,
+       .restore_fl = xen_restore_fl,
+       .irq_disable = xen_irq_disable,
+       .irq_enable = xen_irq_enable,
+       .safe_halt = xen_safe_halt,
+       .halt = xen_halt,
+};
+
+static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
        .apic_write = xen_apic_write,
        .apic_write_atomic = xen_apic_write,
@@ -977,6 +998,17 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .setup_secondary_clock = paravirt_nop,
        .startup_ipi_hook = paravirt_nop,
 #endif
+};
+
+static const struct pv_mmu_ops xen_mmu_ops __initdata = {
+       .pagetable_setup_start = xen_pagetable_setup_start,
+       .pagetable_setup_done = xen_pagetable_setup_done,
+
+       .read_cr2 = xen_read_cr2,
+       .write_cr2 = xen_write_cr2,
+
+       .read_cr3 = xen_read_cr3,
+       .write_cr3 = xen_write_cr3,
 
        .flush_tlb_user = xen_flush_tlb,
        .flush_tlb_kernel = xen_flush_tlb,
@@ -986,9 +1018,6 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .pte_update = paravirt_nop,
        .pte_update_defer = paravirt_nop,
 
-       .pagetable_setup_start = xen_pagetable_setup_start,
-       .pagetable_setup_done = xen_pagetable_setup_done,
-
        .alloc_pt = xen_alloc_pt_init,
        .release_pt = xen_release_pt,
        .alloc_pd = paravirt_nop,
@@ -1024,7 +1053,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .dup_mmap = xen_dup_mmap,
        .exit_mmap = xen_exit_mmap,
 
-       .set_lazy_mode = xen_set_lazy_mode,
+       .lazy_mode = {
+               .enter = paravirt_enter_lazy_mmu,
+               .leave = xen_leave_lazy,
+       },
 };
 
 #ifdef CONFIG_SMP
@@ -1080,6 +1112,17 @@ static const struct machine_ops __initdata xen_machine_ops = {
 };
 
 
+static void __init xen_reserve_top(void)
+{
+       unsigned long top = HYPERVISOR_VIRT_START;
+       struct xen_platform_parameters pp;
+
+       if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
+               top = pp.virt_start;
+
+       reserve_top_address(-top + 2 * PAGE_SIZE);
+}
+
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -1091,7 +1134,14 @@ asmlinkage void __init xen_start_kernel(void)
        BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
 
        /* Install Xen paravirt ops */
-       paravirt_ops = xen_paravirt_ops;
+       pv_info = xen_info;
+       pv_init_ops = xen_init_ops;
+       pv_time_ops = xen_time_ops;
+       pv_cpu_ops = xen_cpu_ops;
+       pv_irq_ops = xen_irq_ops;
+       pv_apic_ops = xen_apic_ops;
+       pv_mmu_ops = xen_mmu_ops;
+
        machine_ops = xen_machine_ops;
 
 #ifdef CONFIG_SMP
@@ -1113,6 +1163,7 @@ asmlinkage void __init xen_start_kernel(void)
        /* keep using Xen gdt for now; no urgent need to change it */
 
        x86_write_percpu(xen_cr3, __pa(pgd));
+       x86_write_percpu(xen_current_cr3, __pa(pgd));
 
 #ifdef CONFIG_SMP
        /* Don't do the full vcpu_info placement stuff until we have a
@@ -1124,12 +1175,12 @@ asmlinkage void __init xen_start_kernel(void)
        xen_setup_vcpu_info_placement();
 #endif
 
-       paravirt_ops.kernel_rpl = 1;
+       pv_info.kernel_rpl = 1;
        if (xen_feature(XENFEAT_supervisor_mode_kernel))
-               paravirt_ops.kernel_rpl = 0;
+               pv_info.kernel_rpl = 0;
 
        /* set the limit of our address space */
-       reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE);
+       xen_reserve_top();
 
        /* set up basic CPUID stuff */
        cpu_detect(&new_cpu_data);