Merge commit 'v2.6.26-rc8' into x86/xen
Ingo Molnar [Wed, 25 Jun 2008 10:16:51 +0000 (12:16 +0200)]
Conflicts:

arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>

39 files changed:
arch/x86/kernel/paravirt.c
arch/x86/xen/Kconfig
arch/x86/xen/Makefile
arch/x86/xen/enlighten.c
arch/x86/xen/manage.c [deleted file]
arch/x86/xen/mmu.c
arch/x86/xen/mmu.h
arch/x86/xen/setup.c
arch/x86/xen/smp.c
arch/x86/xen/suspend.c [new file with mode: 0644]
arch/x86/xen/time.c
arch/x86/xen/xen-head.S
arch/x86/xen/xen-ops.h
drivers/char/hvc_xen.c
drivers/input/xen-kbdfront.c
drivers/lguest/lg.h
drivers/video/xen-fbfront.c
drivers/xen/Makefile
drivers/xen/balloon.c
drivers/xen/events.c
drivers/xen/grant-table.c
drivers/xen/manage.c [new file with mode: 0644]
drivers/xen/xenbus/xenbus_comms.c
include/asm-x86/page.h
include/asm-x86/paravirt.h
include/asm-x86/pgtable.h
include/asm-x86/xen/hypercall.h
include/asm-x86/xen/page.h
include/linux/console.h
include/linux/page-flags.h
include/xen/events.h
include/xen/grant_table.h
include/xen/hvc-console.h
include/xen/interface/elfnote.h
include/xen/interface/io/fbif.h
include/xen/interface/io/kbdif.h
include/xen/interface/memory.h
include/xen/xen-ops.h
kernel/printk.c

index 74f0c5e..c98d546 100644 (file)
@@ -403,6 +403,7 @@ struct pv_mmu_ops pv_mmu_ops = {
 #endif /* PAGETABLE_LEVELS >= 3 */
 
        .pte_val = native_pte_val,
+       .pte_flags = native_pte_val,
        .pgd_val = native_pgd_val,
 
        .make_pte = native_make_pte,
index 6c388e5..c2cc995 100644 (file)
@@ -12,3 +12,13 @@ config XEN
          This is the Linux Xen port.  Enabling this will allow the
          kernel to boot in a paravirtualized environment under the
          Xen hypervisor.
+
+config XEN_MAX_DOMAIN_MEMORY
+       int "Maximum allowed size of a domain in gigabytes"
+       default 8
+       depends on XEN
+       help
+         The pseudo-physical to machine address array is sized
+         according to the maximum possible memory size of a Xen
+         domain.  This array uses 1 page per gigabyte, so there's no
+         need to be too stingy here.
\ No newline at end of file
index 3d8df98..2ba2d16 100644 (file)
@@ -1,4 +1,4 @@
 obj-y          := enlighten.o setup.o multicalls.o mmu.o \
-                       time.o manage.o xen-asm.o grant-table.o
+                       time.o xen-asm.o grant-table.o suspend.o
 
 obj-$(CONFIG_SMP)      += smp.o
index f09c1c6..73fb0c4 100644 (file)
@@ -75,13 +75,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3);      /* actual vcpu cr3 */
 struct start_info *xen_start_info;
 EXPORT_SYMBOL_GPL(xen_start_info);
 
-static /* __initdata */ struct shared_info dummy_shared_info;
+struct shared_info xen_dummy_shared_info;
 
 /*
  * Point at some empty memory to start with. We map the real shared_info
  * page as soon as fixmap is up and running.
  */
-struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
+struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
 
 /*
  * Flag to determine whether vcpu info placement is available on all
@@ -98,13 +98,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
  */
 static int have_vcpu_info_placement = 1;
 
-static void __init xen_vcpu_setup(int cpu)
+static void xen_vcpu_setup(int cpu)
 {
        struct vcpu_register_vcpu_info info;
        int err;
        struct vcpu_info *vcpup;
 
-       BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info);
+       BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
        per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 
        if (!have_vcpu_info_placement)
@@ -136,6 +136,34 @@ static void __init xen_vcpu_setup(int cpu)
        }
 }
 
+/*
+ * On restore, set the vcpu placement up again.
+ * If it fails, then we're in a bad state, since
+ * we can't back out from using it...
+ */
+void xen_vcpu_restore(void)
+{
+       if (have_vcpu_info_placement) {
+               int cpu;
+
+               for_each_online_cpu(cpu) {
+                       bool other_cpu = (cpu != smp_processor_id());
+
+                       if (other_cpu &&
+                           HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+                               BUG();
+
+                       xen_vcpu_setup(cpu);
+
+                       if (other_cpu &&
+                           HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+                               BUG();
+               }
+
+               BUG_ON(!have_vcpu_info_placement);
+       }
+}
+
 static void __init xen_banner(void)
 {
        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -235,13 +263,13 @@ static void xen_irq_enable(void)
 {
        struct vcpu_info *vcpu;
 
-       /* There's a one instruction preempt window here.  We need to
-          make sure we're don't switch CPUs between getting the vcpu
-          pointer and updating the mask. */
-       preempt_disable();
+       /* We don't need to worry about being preempted here, since
+          either a) interrupts are disabled, so no preemption, or b)
+          the caller is confused and is trying to re-enable interrupts
+          on an indeterminate processor. */
+
        vcpu = x86_read_percpu(xen_vcpu);
        vcpu->evtchn_upcall_mask = 0;
-       preempt_enable_no_resched();
 
        /* Doesn't matter if we get preempted here, because any
           pending event will get dealt with anyway. */
@@ -254,7 +282,7 @@ static void xen_irq_enable(void)
 static void xen_safe_halt(void)
 {
        /* Blocking includes an implicit local_irq_enable(). */
-       if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0)
+       if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
                BUG();
 }
 
@@ -607,6 +635,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
        xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
 
+static void xen_clts(void)
+{
+       struct multicall_space mcs;
+
+       mcs = xen_mc_entry(0);
+
+       MULTI_fpu_taskswitch(mcs.mc, 0);
+
+       xen_mc_issue(PARAVIRT_LAZY_CPU);
+}
+
+static void xen_write_cr0(unsigned long cr0)
+{
+       struct multicall_space mcs;
+
+       /* Only pay attention to cr0.TS; everything else is
+          ignored. */
+       mcs = xen_mc_entry(0);
+
+       MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
+
+       xen_mc_issue(PARAVIRT_LAZY_CPU);
+}
+
 static void xen_write_cr2(unsigned long cr2)
 {
        x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
@@ -624,8 +676,10 @@ static unsigned long xen_read_cr2_direct(void)
 
 static void xen_write_cr4(unsigned long cr4)
 {
-       /* Just ignore cr4 changes; Xen doesn't allow us to do
-          anything anyway. */
+       cr4 &= ~X86_CR4_PGE;
+       cr4 &= ~X86_CR4_PSE;
+
+       native_write_cr4(cr4);
 }
 
 static unsigned long xen_read_cr3(void)
@@ -831,7 +885,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
                          PFN_DOWN(__pa(xen_start_info->pt_base)));
 }
 
-static __init void setup_shared_info(void)
+void xen_setup_shared_info(void)
 {
        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
                unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
@@ -854,6 +908,8 @@ static __init void setup_shared_info(void)
        /* In UP this is as good a place as any to set up shared info */
        xen_setup_vcpu_info_placement();
 #endif
+
+       xen_setup_mfn_list_list();
 }
 
 static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -866,15 +922,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
        pv_mmu_ops.release_pmd = xen_release_pmd;
        pv_mmu_ops.set_pte = xen_set_pte;
 
-       setup_shared_info();
+       xen_setup_shared_info();
 
        /* Actually pin the pagetable down, but we can't set PG_pinned
           yet because the page structures don't exist yet. */
        pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
 }
 
+static __init void xen_post_allocator_init(void)
+{
+       pv_mmu_ops.set_pmd = xen_set_pmd;
+       pv_mmu_ops.set_pud = xen_set_pud;
+
+       xen_mark_init_mm_pinned();
+}
+
 /* This is called once we have the cpu_possible_map */
-void __init xen_setup_vcpu_info_placement(void)
+void xen_setup_vcpu_info_placement(void)
 {
        int cpu;
 
@@ -960,7 +1024,7 @@ static const struct pv_init_ops xen_init_ops __initdata = {
        .banner = xen_banner,
        .memory_setup = xen_memory_setup,
        .arch_setup = xen_arch_setup,
-       .post_allocator_init = xen_mark_init_mm_pinned,
+       .post_allocator_init = xen_post_allocator_init,
 };
 
 static const struct pv_time_ops xen_time_ops __initdata = {
@@ -978,10 +1042,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
        .set_debugreg = xen_set_debugreg,
        .get_debugreg = xen_get_debugreg,
 
-       .clts = native_clts,
+       .clts = xen_clts,
 
        .read_cr0 = native_read_cr0,
-       .write_cr0 = native_write_cr0,
+       .write_cr0 = xen_write_cr0,
 
        .read_cr4 = native_read_cr4,
        .read_cr4_safe = native_read_cr4_safe,
@@ -1072,9 +1136,10 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
        .set_pte = NULL,        /* see xen_pagetable_setup_* */
        .set_pte_at = xen_set_pte_at,
-       .set_pmd = xen_set_pmd,
+       .set_pmd = xen_set_pmd_hyper,
 
        .pte_val = xen_pte_val,
+       .pte_flags = native_pte_val,
        .pgd_val = xen_pgd_val,
 
        .make_pte = xen_make_pte,
@@ -1082,7 +1147,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
        .set_pte_atomic = xen_set_pte_atomic,
        .set_pte_present = xen_set_pte_at,
-       .set_pud = xen_set_pud,
+       .set_pud = xen_set_pud_hyper,
        .pte_clear = xen_pte_clear,
        .pmd_clear = xen_pmd_clear,
 
@@ -1114,11 +1179,13 @@ static const struct smp_ops xen_smp_ops __initdata = {
 
 static void xen_reboot(int reason)
 {
+       struct sched_shutdown r = { .reason = reason };
+
 #ifdef CONFIG_SMP
        smp_send_stop();
 #endif
 
-       if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason))
+       if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
                BUG();
 }
 
@@ -1192,7 +1259,7 @@ asmlinkage void __init xen_start_kernel(void)
 
        /* Get mfn list */
        if (!xen_feature(XENFEAT_auto_translated_physmap))
-               phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
+               xen_build_dynamic_phys_to_machine();
 
        pgd = (pgd_t *)xen_start_info->pt_base;
 
@@ -1232,8 +1299,11 @@ asmlinkage void __init xen_start_kernel(void)
                ? __pa(xen_start_info->mod_start) : 0;
        boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
 
-       if (!is_initial_xendomain())
+       if (!is_initial_xendomain()) {
+               add_preferred_console("xenboot", 0, NULL);
+               add_preferred_console("tty", 0, NULL);
                add_preferred_console("hvc", 0, NULL);
+       }
 
        /* Start the world */
        start_kernel();
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c
deleted file mode 100644 (file)
index aa7af9e..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Handle extern requests for shutdown, reboot and sysrq
- */
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/reboot.h>
-#include <linux/sysrq.h>
-
-#include <xen/xenbus.h>
-
-#define SHUTDOWN_INVALID  -1
-#define SHUTDOWN_POWEROFF  0
-#define SHUTDOWN_SUSPEND   2
-/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
- * report a crash, not be instructed to crash!
- * HALT is the same as POWEROFF, as far as we're concerned.  The tools use
- * the distinction when we return the reason code to them.
- */
-#define SHUTDOWN_HALT      4
-
-/* Ignore multiple shutdown requests. */
-static int shutting_down = SHUTDOWN_INVALID;
-
-static void shutdown_handler(struct xenbus_watch *watch,
-                            const char **vec, unsigned int len)
-{
-       char *str;
-       struct xenbus_transaction xbt;
-       int err;
-
-       if (shutting_down != SHUTDOWN_INVALID)
-               return;
-
- again:
-       err = xenbus_transaction_start(&xbt);
-       if (err)
-               return;
-
-       str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
-       /* Ignore read errors and empty reads. */
-       if (XENBUS_IS_ERR_READ(str)) {
-               xenbus_transaction_end(xbt, 1);
-               return;
-       }
-
-       xenbus_write(xbt, "control", "shutdown", "");
-
-       err = xenbus_transaction_end(xbt, 0);
-       if (err == -EAGAIN) {
-               kfree(str);
-               goto again;
-       }
-
-       if (strcmp(str, "poweroff") == 0 ||
-           strcmp(str, "halt") == 0)
-               orderly_poweroff(false);
-       else if (strcmp(str, "reboot") == 0)
-               ctrl_alt_del();
-       else {
-               printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
-               shutting_down = SHUTDOWN_INVALID;
-       }
-
-       kfree(str);
-}
-
-static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
-                         unsigned int len)
-{
-       char sysrq_key = '\0';
-       struct xenbus_transaction xbt;
-       int err;
-
- again:
-       err = xenbus_transaction_start(&xbt);
-       if (err)
-               return;
-       if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
-               printk(KERN_ERR "Unable to read sysrq code in "
-                      "control/sysrq\n");
-               xenbus_transaction_end(xbt, 1);
-               return;
-       }
-
-       if (sysrq_key != '\0')
-               xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
-
-       err = xenbus_transaction_end(xbt, 0);
-       if (err == -EAGAIN)
-               goto again;
-
-       if (sysrq_key != '\0')
-               handle_sysrq(sysrq_key, NULL);
-}
-
-static struct xenbus_watch shutdown_watch = {
-       .node = "control/shutdown",
-       .callback = shutdown_handler
-};
-
-static struct xenbus_watch sysrq_watch = {
-       .node = "control/sysrq",
-       .callback = sysrq_handler
-};
-
-static int setup_shutdown_watcher(void)
-{
-       int err;
-
-       err = register_xenbus_watch(&shutdown_watch);
-       if (err) {
-               printk(KERN_ERR "Failed to set shutdown watcher\n");
-               return err;
-       }
-
-       err = register_xenbus_watch(&sysrq_watch);
-       if (err) {
-               printk(KERN_ERR "Failed to set sysrq watcher\n");
-               return err;
-       }
-
-       return 0;
-}
-
-static int shutdown_event(struct notifier_block *notifier,
-                         unsigned long event,
-                         void *data)
-{
-       setup_shutdown_watcher();
-       return NOTIFY_DONE;
-}
-
-static int __init setup_shutdown_event(void)
-{
-       static struct notifier_block xenstore_notifier = {
-               .notifier_call = shutdown_event
-       };
-       register_xenstore_notifier(&xenstore_notifier);
-
-       return 0;
-}
-
-subsys_initcall(setup_shutdown_event);
index df40bf7..8132aa8 100644 (file)
 #include "multicalls.h"
 #include "mmu.h"
 
+#define P2M_ENTRIES_PER_PAGE   (PAGE_SIZE / sizeof(unsigned long))
+#define TOP_ENTRIES            (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
+
+/* Placeholder for holes in the address space */
+static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
+       __attribute__((section(".data.page_aligned"))) =
+               { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
+
+ /* Array of pointers to pages containing p2m entries */
+static unsigned long *p2m_top[TOP_ENTRIES]
+       __attribute__((section(".data.page_aligned"))) =
+               { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
+
+/* Arrays of p2m arrays expressed in mfns used for save/restore */
+static unsigned long p2m_top_mfn[TOP_ENTRIES]
+       __attribute__((section(".bss.page_aligned")));
+
+static unsigned long p2m_top_mfn_list[
+                       PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
+       __attribute__((section(".bss.page_aligned")));
+
+static inline unsigned p2m_top_index(unsigned long pfn)
+{
+       BUG_ON(pfn >= MAX_DOMAIN_PAGES);
+       return pfn / P2M_ENTRIES_PER_PAGE;
+}
+
+static inline unsigned p2m_index(unsigned long pfn)
+{
+       return pfn % P2M_ENTRIES_PER_PAGE;
+}
+
+/* Build the parallel p2m_top_mfn structures */
+void xen_setup_mfn_list_list(void)
+{
+       unsigned pfn, idx;
+
+       for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
+               unsigned topidx = p2m_top_index(pfn);
+
+               p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
+       }
+
+       for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
+               unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
+               p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
+       }
+
+       BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+
+       HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+               virt_to_mfn(p2m_top_mfn_list);
+       HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
+}
+
+/* Set up p2m_top to point to the domain-builder provided p2m pages */
+void __init xen_build_dynamic_phys_to_machine(void)
+{
+       unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
+       unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
+       unsigned pfn;
+
+       for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
+               unsigned topidx = p2m_top_index(pfn);
+
+               p2m_top[topidx] = &mfn_list[pfn];
+       }
+}
+
+unsigned long get_phys_to_machine(unsigned long pfn)
+{
+       unsigned topidx, idx;
+
+       if (unlikely(pfn >= MAX_DOMAIN_PAGES))
+               return INVALID_P2M_ENTRY;
+
+       topidx = p2m_top_index(pfn);
+       idx = p2m_index(pfn);
+       return p2m_top[topidx][idx];
+}
+EXPORT_SYMBOL_GPL(get_phys_to_machine);
+
+static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
+{
+       unsigned long *p;
+       unsigned i;
+
+       p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+       BUG_ON(p == NULL);
+
+       for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
+               p[i] = INVALID_P2M_ENTRY;
+
+       if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
+               free_page((unsigned long)p);
+       else
+               *mfnp = virt_to_mfn(p);
+}
+
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+       unsigned topidx, idx;
+
+       if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+               BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+               return;
+       }
+
+       if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
+               BUG_ON(mfn != INVALID_P2M_ENTRY);
+               return;
+       }
+
+       topidx = p2m_top_index(pfn);
+       if (p2m_top[topidx] == p2m_missing) {
+               /* no need to allocate a page to store an invalid entry */
+               if (mfn == INVALID_P2M_ENTRY)
+                       return;
+               alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
+       }
+
+       idx = p2m_index(pfn);
+       p2m_top[topidx][idx] = mfn;
+}
+
 xmaddr_t arbitrary_virt_to_machine(unsigned long address)
 {
        unsigned int level;
@@ -98,7 +223,14 @@ void make_lowmem_page_readwrite(void *vaddr)
 }
 
 
-void xen_set_pmd(pmd_t *ptr, pmd_t val)
+static bool page_pinned(void *ptr)
+{
+       struct page *page = virt_to_page(ptr);
+
+       return PagePinned(page);
+}
+
+void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
 {
        struct multicall_space mcs;
        struct mmu_update *u;
@@ -116,6 +248,18 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val)
        preempt_enable();
 }
 
+void xen_set_pmd(pmd_t *ptr, pmd_t val)
+{
+       /* If page is not pinned, we can just update the entry
+          directly */
+       if (!page_pinned(ptr)) {
+               *ptr = val;
+               return;
+       }
+
+       xen_set_pmd_hyper(ptr, val);
+}
+
 /*
  * Associate a virtual page frame with a given physical page frame
  * and protection flags for that frame.
@@ -229,7 +373,7 @@ pmdval_t xen_pmd_val(pmd_t pmd)
        return pte_mfn_to_pfn(pmd.pmd);
 }
 
-void xen_set_pud(pud_t *ptr, pud_t val)
+void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 {
        struct multicall_space mcs;
        struct mmu_update *u;
@@ -247,6 +391,18 @@ void xen_set_pud(pud_t *ptr, pud_t val)
        preempt_enable();
 }
 
+void xen_set_pud(pud_t *ptr, pud_t val)
+{
+       /* If page is not pinned, we can just update the entry
+          directly */
+       if (!page_pinned(ptr)) {
+               *ptr = val;
+               return;
+       }
+
+       xen_set_pud_hyper(ptr, val);
+}
+
 void xen_set_pte(pte_t *ptep, pte_t pte)
 {
        ptep->pte_high = pte.pte_high;
@@ -268,7 +424,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 
 void xen_pmd_clear(pmd_t *pmdp)
 {
-       xen_set_pmd(pmdp, __pmd(0));
+       set_pmd(pmdp, __pmd(0));
 }
 
 pmd_t xen_make_pmd(pmdval_t pmd)
@@ -441,6 +597,29 @@ void xen_pgd_pin(pgd_t *pgd)
        xen_mc_issue(0);
 }
 
+/*
+ * On save, we need to pin all pagetables to make sure they get their
+ * mfns turned into pfns.  Search the list for any unpinned pgds and pin
+ * them (unpinned pgds are not currently in use, probably because the
+ * process is under construction or destruction).
+ */
+void xen_mm_pin_all(void)
+{
+       unsigned long flags;
+       struct page *page;
+
+       spin_lock_irqsave(&pgd_lock, flags);
+
+       list_for_each_entry(page, &pgd_list, lru) {
+               if (!PagePinned(page)) {
+                       xen_pgd_pin((pgd_t *)page_address(page));
+                       SetPageSavePinned(page);
+               }
+       }
+
+       spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
 /* The init_mm pagetable is really pinned as soon as its created, but
    that's before we have page structures to store the bits.  So do all
    the book-keeping now. */
@@ -498,6 +677,29 @@ static void xen_pgd_unpin(pgd_t *pgd)
        xen_mc_issue(0);
 }
 
+/*
+ * On resume, undo any pinning done at save, so that the rest of the
+ * kernel doesn't see any unexpected pinned pagetables.
+ */
+void xen_mm_unpin_all(void)
+{
+       unsigned long flags;
+       struct page *page;
+
+       spin_lock_irqsave(&pgd_lock, flags);
+
+       list_for_each_entry(page, &pgd_list, lru) {
+               if (PageSavePinned(page)) {
+                       BUG_ON(!PagePinned(page));
+                       printk("unpinning pinned %p\n", page_address(page));
+                       xen_pgd_unpin((pgd_t *)page_address(page));
+                       ClearPageSavePinned(page);
+               }
+       }
+
+       spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
 void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
 {
        spin_lock(&next->page_table_lock);
@@ -591,7 +793,7 @@ void xen_exit_mmap(struct mm_struct *mm)
        spin_lock(&mm->page_table_lock);
 
        /* pgd may not be pinned in the error exit path of execve */
-       if (PagePinned(virt_to_page(mm->pgd)))
+       if (page_pinned(mm->pgd))
                xen_pgd_unpin(mm->pgd);
 
        spin_unlock(&mm->page_table_lock);
index 5fe961c..e3dd09e 100644 (file)
@@ -25,10 +25,6 @@ enum pt_level {
 
 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-void xen_set_pte(pte_t *ptep, pte_t pteval);
-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
-                   pte_t *ptep, pte_t pteval);
-void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
 
 void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
 void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
@@ -45,10 +41,14 @@ pte_t xen_make_pte(pteval_t);
 pmd_t xen_make_pmd(pmdval_t);
 pgd_t xen_make_pgd(pgdval_t);
 
+void xen_set_pte(pte_t *ptep, pte_t pteval);
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
                    pte_t *ptep, pte_t pteval);
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
+void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
 void xen_set_pud(pud_t *ptr, pud_t val);
+void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
+void xen_set_pud_hyper(pud_t *ptr, pud_t val);
 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void xen_pmd_clear(pmd_t *pmdp);
 
index 82517e4..4884478 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
+#include <xen/page.h>
 #include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
@@ -27,8 +28,6 @@
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
 
-unsigned long *phys_to_machine_mapping;
-EXPORT_SYMBOL(phys_to_machine_mapping);
 
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
@@ -38,6 +37,8 @@ char * __init xen_memory_setup(void)
 {
        unsigned long max_pfn = xen_start_info->nr_pages;
 
+       max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
+
        e820.nr_map = 0;
        add_memory_region(0, LOWMEMSIZE(), E820_RAM);
        add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM);
index 94e6900..d2e3c20 100644 (file)
@@ -35,7 +35,7 @@
 #include "xen-ops.h"
 #include "mmu.h"
 
-static cpumask_t xen_cpu_initialized_map;
+cpumask_t xen_cpu_initialized_map;
 static DEFINE_PER_CPU(int, resched_irq) = -1;
 static DEFINE_PER_CPU(int, callfunc_irq) = -1;
 static DEFINE_PER_CPU(int, debug_irq) = -1;
@@ -65,6 +65,12 @@ static struct call_data_struct *call_data;
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
+#ifdef CONFIG_X86_32
+       __get_cpu_var(irq_stat).irq_resched_count++;
+#else
+       add_pda(irq_resched_count, 1);
+#endif
+
        return IRQ_HANDLED;
 }
 
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
new file mode 100644 (file)
index 0000000..251669a
--- /dev/null
@@ -0,0 +1,45 @@
+#include <linux/types.h>
+
+#include <xen/interface/xen.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
+#include "xen-ops.h"
+#include "mmu.h"
+
+void xen_pre_suspend(void)
+{
+       xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
+       xen_start_info->console.domU.mfn =
+               mfn_to_pfn(xen_start_info->console.domU.mfn);
+
+       BUG_ON(!irqs_disabled());
+
+       HYPERVISOR_shared_info = &xen_dummy_shared_info;
+       if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
+                                        __pte_ma(0), 0))
+               BUG();
+}
+
+void xen_post_suspend(int suspend_cancelled)
+{
+       xen_setup_shared_info();
+
+       if (suspend_cancelled) {
+               xen_start_info->store_mfn =
+                       pfn_to_mfn(xen_start_info->store_mfn);
+               xen_start_info->console.domU.mfn =
+                       pfn_to_mfn(xen_start_info->console.domU.mfn);
+       } else {
+#ifdef CONFIG_SMP
+               xen_cpu_initialized_map = cpu_online_map;
+#endif
+               xen_vcpu_restore();
+               xen_timer_resume();
+       }
+
+}
+
index 41e2175..64f0038 100644 (file)
@@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void)
        clockevents_register_device(&__get_cpu_var(xen_clock_events));
 }
 
+void xen_timer_resume(void)
+{
+       int cpu;
+
+       if (xen_clockevent != &xen_vcpuop_clockevent)
+               return;
+
+       for_each_online_cpu(cpu) {
+               if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
+                       BUG();
+       }
+}
+
 __init void xen_time_init(void)
 {
        int cpu = smp_processor_id();
index 6ec3b4f..7c0cf63 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <asm/boot.h>
 #include <xen/interface/elfnote.h>
+#include <asm/xen/interface.h>
 
        __INIT
 ENTRY(startup_xen)
@@ -32,5 +33,9 @@ ENTRY(hypercall_page)
        ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz "!writable_page_tables|pae_pgdir_above_4gb")
        ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
        ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
+       ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
+               .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
+       ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
+       ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long __HYPERVISOR_VIRT_START)
 
 #endif /*CONFIG_XEN */
index f1063ae..9a05559 100644 (file)
@@ -9,18 +9,26 @@
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
 
+struct trap_info;
 void xen_copy_trap_info(struct trap_info *traps);
 
 DECLARE_PER_CPU(unsigned long, xen_cr3);
 DECLARE_PER_CPU(unsigned long, xen_current_cr3);
 
 extern struct start_info *xen_start_info;
+extern struct shared_info xen_dummy_shared_info;
 extern struct shared_info *HYPERVISOR_shared_info;
 
+void xen_setup_mfn_list_list(void);
+void xen_setup_shared_info(void);
+
 char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
 void xen_enable_sysenter(void);
+void xen_vcpu_restore(void);
+
+void __init xen_build_dynamic_phys_to_machine(void);
 
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
@@ -29,6 +37,7 @@ void __init xen_time_init(void);
 unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
 unsigned long long xen_sched_clock(void);
+void xen_timer_resume(void);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
@@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
                               void *info, int wait);
 
+extern cpumask_t xen_cpu_initialized_map;
+
 
 /* Declare an asm function, along with symbols needed to make it
    inlineable */
index dd68f85..db2ae42 100644 (file)
@@ -39,9 +39,14 @@ static int xencons_irq;
 
 /* ------------------------------------------------------------------ */
 
+static unsigned long console_pfn = ~0ul;
+
 static inline struct xencons_interface *xencons_interface(void)
 {
-       return mfn_to_virt(xen_start_info->console.domU.mfn);
+       if (console_pfn == ~0ul)
+               return mfn_to_virt(xen_start_info->console.domU.mfn);
+       else
+               return __va(console_pfn << PAGE_SHIFT);
 }
 
 static inline void notify_daemon(void)
@@ -101,20 +106,32 @@ static int __init xen_init(void)
 {
        struct hvc_struct *hp;
 
-       if (!is_running_on_xen())
-               return 0;
+       if (!is_running_on_xen() ||
+           is_initial_xendomain() ||
+           !xen_start_info->console.domU.evtchn)
+               return -ENODEV;
 
        xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
        if (xencons_irq < 0)
-               xencons_irq = 0 /* NO_IRQ */;
+               xencons_irq = 0; /* NO_IRQ */
+
        hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
        if (IS_ERR(hp))
                return PTR_ERR(hp);
 
        hvc = hp;
+
+       console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn);
+
        return 0;
 }
 
+void xen_console_resume(void)
+{
+       if (xencons_irq)
+               rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq);
+}
+
 static void __exit xen_fini(void)
 {
        if (hvc)
@@ -134,12 +151,28 @@ module_init(xen_init);
 module_exit(xen_fini);
 console_initcall(xen_cons_init);
 
+static void raw_console_write(const char *str, int len)
+{
+       while(len > 0) {
+               int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
+               if (rc <= 0)
+                       break;
+
+               str += rc;
+               len -= rc;
+       }
+}
+
+#ifdef CONFIG_EARLY_PRINTK
 static void xenboot_write_console(struct console *console, const char *string,
                                  unsigned len)
 {
        unsigned int linelen, off = 0;
        const char *pos;
 
+       raw_console_write(string, len);
+
+       write_console(0, "(early) ", 8);
        while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
                linelen = pos-string+off;
                if (off + linelen > len)
@@ -155,5 +188,23 @@ static void xenboot_write_console(struct console *console, const char *string,
 struct console xenboot_console = {
        .name           = "xenboot",
        .write          = xenboot_write_console,
-       .flags          = CON_PRINTBUFFER | CON_BOOT,
+       .flags          = CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME,
 };
+#endif /* CONFIG_EARLY_PRINTK */
+
+void xen_raw_console_write(const char *str)
+{
+       raw_console_write(str, strlen(str));
+}
+
+void xen_raw_printk(const char *fmt, ...)
+{
+       static char buf[512];
+       va_list ap;
+
+       va_start(ap, fmt);
+       vsnprintf(buf, sizeof(buf), fmt, ap);
+       va_end(ap);
+
+       xen_raw_console_write(buf);
+}
index 0f47f46..9ce3b3b 100644 (file)
@@ -66,6 +66,9 @@ static irqreturn_t input_handler(int rq, void *dev_id)
                case XENKBD_TYPE_MOTION:
                        input_report_rel(dev, REL_X, event->motion.rel_x);
                        input_report_rel(dev, REL_Y, event->motion.rel_y);
+                       if (event->motion.rel_z)
+                               input_report_rel(dev, REL_WHEEL,
+                                                -event->motion.rel_z);
                        break;
                case XENKBD_TYPE_KEY:
                        dev = NULL;
@@ -84,6 +87,9 @@ static irqreturn_t input_handler(int rq, void *dev_id)
                case XENKBD_TYPE_POS:
                        input_report_abs(dev, ABS_X, event->pos.abs_x);
                        input_report_abs(dev, ABS_Y, event->pos.abs_y);
+                       if (event->pos.rel_z)
+                               input_report_rel(dev, REL_WHEEL,
+                                                -event->pos.rel_z);
                        break;
                }
                if (dev)
@@ -152,7 +158,7 @@ static int __devinit xenkbd_probe(struct xenbus_device *dev,
        ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
        for (i = BTN_LEFT; i <= BTN_TASK; i++)
                set_bit(i, ptr->keybit);
-       ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+       ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL);
        input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
        input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
 
@@ -294,6 +300,16 @@ InitWait:
                 */
                if (dev->state != XenbusStateConnected)
                        goto InitWait; /* no InitWait seen yet, fudge it */
+
+               /* Set input abs params to match backend screen res */
+               if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                                "width", "%d", &val) > 0)
+                       input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0);
+
+               if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                                "height", "%d", &val) > 0)
+                       input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0);
+
                break;
 
        case XenbusStateClosing:
@@ -337,4 +353,6 @@ static void __exit xenkbd_cleanup(void)
 module_init(xenkbd_init);
 module_exit(xenkbd_cleanup);
 
+MODULE_DESCRIPTION("Xen virtual keyboard/pointer device frontend");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:vkbd");
index 005bd04..5faefea 100644 (file)
@@ -136,7 +136,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
  * first step in the migration to the kernel types.  pte_pfn is already defined
  * in the kernel. */
 #define pgd_flags(x)   (pgd_val(x) & ~PAGE_MASK)
-#define pte_flags(x)   (pte_val(x) & ~PAGE_MASK)
 #define pgd_pfn(x)     (pgd_val(x) >> PAGE_SHIFT)
 
 /* interrupts_and_traps.c: */
index 619a6f8..47ed39b 100644 (file)
@@ -18,6 +18,7 @@
  * frame buffer.
  */
 
+#include <linux/console.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/fb.h>
@@ -42,37 +43,68 @@ struct xenfb_info {
        struct xenfb_page       *page;
        unsigned long           *mfns;
        int                     update_wanted; /* XENFB_TYPE_UPDATE wanted */
+       int                     feature_resize; /* XENFB_TYPE_RESIZE ok */
+       struct xenfb_resize     resize;         /* protected by resize_lock */
+       int                     resize_dpy;     /* ditto */
+       spinlock_t              resize_lock;
 
        struct xenbus_device    *xbdev;
 };
 
-static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8;
+#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8)
 
+enum { KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT };
+static int video[KPARAM_CNT] = { 2, XENFB_WIDTH, XENFB_HEIGHT };
+module_param_array(video, int, NULL, 0);
+MODULE_PARM_DESC(video,
+       "Video memory size in MB, width, height in pixels (default 2,800,600)");
+
+static void xenfb_make_preferred_console(void);
 static int xenfb_remove(struct xenbus_device *);
-static void xenfb_init_shared_page(struct xenfb_info *);
+static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *);
 static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
 static void xenfb_disconnect_backend(struct xenfb_info *);
 
+static void xenfb_send_event(struct xenfb_info *info,
+                            union xenfb_out_event *event)
+{
+       u32 prod;
+
+       prod = info->page->out_prod;
+       /* caller ensures !xenfb_queue_full() */
+       mb();                   /* ensure ring space available */
+       XENFB_OUT_RING_REF(info->page, prod) = *event;
+       wmb();                  /* ensure ring contents visible */
+       info->page->out_prod = prod + 1;
+
+       notify_remote_via_irq(info->irq);
+}
+
 static void xenfb_do_update(struct xenfb_info *info,
                            int x, int y, int w, int h)
 {
        union xenfb_out_event event;
-       u32 prod;
 
+       memset(&event, 0, sizeof(event));
        event.type = XENFB_TYPE_UPDATE;
        event.update.x = x;
        event.update.y = y;
        event.update.width = w;
        event.update.height = h;
 
-       prod = info->page->out_prod;
        /* caller ensures !xenfb_queue_full() */
-       mb();                   /* ensure ring space available */
-       XENFB_OUT_RING_REF(info->page, prod) = event;
-       wmb();                  /* ensure ring contents visible */
-       info->page->out_prod = prod + 1;
+       xenfb_send_event(info, &event);
+}
 
-       notify_remote_via_irq(info->irq);
+static void xenfb_do_resize(struct xenfb_info *info)
+{
+       union xenfb_out_event event;
+
+       memset(&event, 0, sizeof(event));
+       event.resize = info->resize;
+
+       /* caller ensures !xenfb_queue_full() */
+       xenfb_send_event(info, &event);
 }
 
 static int xenfb_queue_full(struct xenfb_info *info)
@@ -84,12 +116,28 @@ static int xenfb_queue_full(struct xenfb_info *info)
        return prod - cons == XENFB_OUT_RING_LEN;
 }
 
+static void xenfb_handle_resize_dpy(struct xenfb_info *info)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&info->resize_lock, flags);
+       if (info->resize_dpy) {
+               if (!xenfb_queue_full(info)) {
+                       info->resize_dpy = 0;
+                       xenfb_do_resize(info);
+               }
+       }
+       spin_unlock_irqrestore(&info->resize_lock, flags);
+}
+
 static void xenfb_refresh(struct xenfb_info *info,
                          int x1, int y1, int w, int h)
 {
        unsigned long flags;
-       int y2 = y1 + h - 1;
        int x2 = x1 + w - 1;
+       int y2 = y1 + h - 1;
+
+       xenfb_handle_resize_dpy(info);
 
        if (!info->update_wanted)
                return;
@@ -222,6 +270,57 @@ static ssize_t xenfb_write(struct fb_info *p, const char __user *buf,
        return res;
 }
 
+static int
+xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+       struct xenfb_info *xenfb_info;
+       int required_mem_len;
+
+       xenfb_info = info->par;
+
+       if (!xenfb_info->feature_resize) {
+               if (var->xres == video[KPARAM_WIDTH] &&
+                   var->yres == video[KPARAM_HEIGHT] &&
+                   var->bits_per_pixel == xenfb_info->page->depth) {
+                       return 0;
+               }
+               return -EINVAL;
+       }
+
+       /* Can't resize past initial width and height */
+       if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT])
+               return -EINVAL;
+
+       required_mem_len = var->xres * var->yres * xenfb_info->page->depth / 8;
+       if (var->bits_per_pixel == xenfb_info->page->depth &&
+           var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) &&
+           required_mem_len <= info->fix.smem_len) {
+               var->xres_virtual = var->xres;
+               var->yres_virtual = var->yres;
+               return 0;
+       }
+       return -EINVAL;
+}
+
+static int xenfb_set_par(struct fb_info *info)
+{
+       struct xenfb_info *xenfb_info;
+       unsigned long flags;
+
+       xenfb_info = info->par;
+
+       spin_lock_irqsave(&xenfb_info->resize_lock, flags);
+       xenfb_info->resize.type = XENFB_TYPE_RESIZE;
+       xenfb_info->resize.width = info->var.xres;
+       xenfb_info->resize.height = info->var.yres;
+       xenfb_info->resize.stride = info->fix.line_length;
+       xenfb_info->resize.depth = info->var.bits_per_pixel;
+       xenfb_info->resize.offset = 0;
+       xenfb_info->resize_dpy = 1;
+       spin_unlock_irqrestore(&xenfb_info->resize_lock, flags);
+       return 0;
+}
+
 static struct fb_ops xenfb_fb_ops = {
        .owner          = THIS_MODULE,
        .fb_read        = fb_sys_read,
@@ -230,6 +329,8 @@ static struct fb_ops xenfb_fb_ops = {
        .fb_fillrect    = xenfb_fillrect,
        .fb_copyarea    = xenfb_copyarea,
        .fb_imageblit   = xenfb_imageblit,
+       .fb_check_var   = xenfb_check_var,
+       .fb_set_par     = xenfb_set_par,
 };
 
 static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
@@ -258,6 +359,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
 {
        struct xenfb_info *info;
        struct fb_info *fb_info;
+       int fb_size;
+       int val;
        int ret;
 
        info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -265,18 +368,35 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
                xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
                return -ENOMEM;
        }
+
+       /* Limit kernel param videoram amount to what is in xenstore */
+       if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) {
+               if (val < video[KPARAM_MEM])
+                       video[KPARAM_MEM] = val;
+       }
+
+       /* If requested res does not fit in available memory, use default */
+       fb_size = video[KPARAM_MEM] * 1024 * 1024;
+       if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8
+           > fb_size) {
+               video[KPARAM_WIDTH] = XENFB_WIDTH;
+               video[KPARAM_HEIGHT] = XENFB_HEIGHT;
+               fb_size = XENFB_DEFAULT_FB_LEN;
+       }
+
        dev->dev.driver_data = info;
        info->xbdev = dev;
        info->irq = -1;
        info->x1 = info->y1 = INT_MAX;
        spin_lock_init(&info->dirty_lock);
+       spin_lock_init(&info->resize_lock);
 
-       info->fb = vmalloc(xenfb_mem_len);
+       info->fb = vmalloc(fb_size);
        if (info->fb == NULL)
                goto error_nomem;
-       memset(info->fb, 0, xenfb_mem_len);
+       memset(info->fb, 0, fb_size);
 
-       info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
        info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
        if (!info->mfns)
@@ -287,8 +407,6 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
        if (!info->page)
                goto error_nomem;
 
-       xenfb_init_shared_page(info);
-
        /* abusing framebuffer_alloc() to allocate pseudo_palette */
        fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
        if (fb_info == NULL)
@@ -301,9 +419,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
        fb_info->screen_base = info->fb;
 
        fb_info->fbops = &xenfb_fb_ops;
-       fb_info->var.xres_virtual = fb_info->var.xres = info->page->width;
-       fb_info->var.yres_virtual = fb_info->var.yres = info->page->height;
-       fb_info->var.bits_per_pixel = info->page->depth;
+       fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH];
+       fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT];
+       fb_info->var.bits_per_pixel = XENFB_DEPTH;
 
        fb_info->var.red = (struct fb_bitfield){16, 8, 0};
        fb_info->var.green = (struct fb_bitfield){8, 8, 0};
@@ -315,9 +433,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
        fb_info->var.vmode = FB_VMODE_NONINTERLACED;
 
        fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
-       fb_info->fix.line_length = info->page->line_length;
+       fb_info->fix.line_length = fb_info->var.xres * XENFB_DEPTH / 8;
        fb_info->fix.smem_start = 0;
-       fb_info->fix.smem_len = xenfb_mem_len;
+       fb_info->fix.smem_len = fb_size;
        strcpy(fb_info->fix.id, "xen");
        fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
        fb_info->fix.accel = FB_ACCEL_NONE;
@@ -334,6 +452,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
        fb_info->fbdefio = &xenfb_defio;
        fb_deferred_io_init(fb_info);
 
+       xenfb_init_shared_page(info, fb_info);
+
        ret = register_framebuffer(fb_info);
        if (ret) {
                fb_deferred_io_cleanup(fb_info);
@@ -348,6 +468,7 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
        if (ret < 0)
                goto error;
 
+       xenfb_make_preferred_console();
        return 0;
 
  error_nomem:
@@ -358,12 +479,34 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
        return ret;
 }
 
+static __devinit void
+xenfb_make_preferred_console(void)
+{
+       struct console *c;
+
+       if (console_set_on_cmdline)
+               return;
+
+       acquire_console_sem();
+       for (c = console_drivers; c; c = c->next) {
+               if (!strcmp(c->name, "tty") && c->index == 0)
+                       break;
+       }
+       release_console_sem();
+       if (c) {
+               unregister_console(c);
+               c->flags |= CON_CONSDEV;
+               c->flags &= ~CON_PRINTBUFFER; /* don't print again */
+               register_console(c);
+       }
+}
+
 static int xenfb_resume(struct xenbus_device *dev)
 {
        struct xenfb_info *info = dev->dev.driver_data;
 
        xenfb_disconnect_backend(info);
-       xenfb_init_shared_page(info);
+       xenfb_init_shared_page(info, info->fb_info);
        return xenfb_connect_backend(dev, info);
 }
 
@@ -391,20 +534,23 @@ static unsigned long vmalloc_to_mfn(void *address)
        return pfn_to_mfn(vmalloc_to_pfn(address));
 }
 
-static void xenfb_init_shared_page(struct xenfb_info *info)
+static void xenfb_init_shared_page(struct xenfb_info *info,
+                                  struct fb_info *fb_info)
 {
        int i;
+       int epd = PAGE_SIZE / sizeof(info->mfns[0]);
 
        for (i = 0; i < info->nr_pages; i++)
                info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
 
-       info->page->pd[0] = vmalloc_to_mfn(info->mfns);
-       info->page->pd[1] = 0;
-       info->page->width = XENFB_WIDTH;
-       info->page->height = XENFB_HEIGHT;
-       info->page->depth = XENFB_DEPTH;
-       info->page->line_length = (info->page->depth / 8) * info->page->width;
-       info->page->mem_length = xenfb_mem_len;
+       for (i = 0; i * epd < info->nr_pages; i++)
+               info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]);
+
+       info->page->width = fb_info->var.xres;
+       info->page->height = fb_info->var.yres;
+       info->page->depth = fb_info->var.bits_per_pixel;
+       info->page->line_length = fb_info->fix.line_length;
+       info->page->mem_length = fb_info->fix.smem_len;
        info->page->in_cons = info->page->in_prod = 0;
        info->page->out_cons = info->page->out_prod = 0;
 }
@@ -504,6 +650,11 @@ InitWait:
                        val = 0;
                if (val)
                        info->update_wanted = 1;
+
+               if (xenbus_scanf(XBT_NIL, dev->otherend,
+                                "feature-resize", "%d", &val) < 0)
+                       val = 0;
+               info->feature_resize = val;
                break;
 
        case XenbusStateClosing:
@@ -547,4 +698,6 @@ static void __exit xenfb_cleanup(void)
 module_init(xenfb_init);
 module_exit(xenfb_cleanup);
 
+MODULE_DESCRIPTION("Xen virtual framebuffer device frontend");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:vfb");
index 37af04f..363286c 100644 (file)
@@ -1,4 +1,4 @@
-obj-y  += grant-table.o features.o events.o
+obj-y  += grant-table.o features.o events.o manage.o
 obj-y  += xenbus/
 obj-$(CONFIG_XEN_XENCOMM)      += xencomm.o
 obj-$(CONFIG_XEN_BALLOON)      += balloon.o
index ab25ba6..591bc29 100644 (file)
@@ -225,7 +225,7 @@ static int increase_reservation(unsigned long nr_pages)
                page = balloon_next_page(page);
        }
 
-       reservation.extent_start = (unsigned long)frame_list;
+       set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
        rc = HYPERVISOR_memory_op(
                XENMEM_populate_physmap, &reservation);
@@ -321,7 +321,7 @@ static int decrease_reservation(unsigned long nr_pages)
                balloon_append(pfn_to_page(pfn));
        }
 
-       reservation.extent_start = (unsigned long)frame_list;
+       set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
        BUG_ON(ret != nr_pages);
@@ -368,7 +368,7 @@ static void balloon_process(struct work_struct *work)
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
-void balloon_set_new_target(unsigned long target)
+static void balloon_set_new_target(unsigned long target)
 {
        /* No need for lock. Not read-modify-write updates. */
        balloon_stats.hard_limit   = ~0UL;
@@ -483,7 +483,7 @@ static int dealloc_pte_fn(
                .extent_order = 0,
                .domid        = DOMID_SELF
        };
-       reservation.extent_start = (unsigned long)&mfn;
+       set_xen_guest_handle(reservation.extent_start, &mfn);
        set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
        set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
@@ -519,7 +519,7 @@ static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
                                .extent_order = 0,
                                .domid        = DOMID_SELF
                        };
-                       reservation.extent_start = (unsigned long)&gmfn;
+                       set_xen_guest_handle(reservation.extent_start, &gmfn);
                        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
                                                   &reservation);
                        if (ret == 1)
index 76e5b73..332dd63 100644 (file)
@@ -355,7 +355,7 @@ static void unbind_from_irq(unsigned int irq)
 
        spin_lock(&irq_mapping_update_lock);
 
-       if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
+       if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
                close.port = evtchn;
                if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
                        BUG();
@@ -375,7 +375,7 @@ static void unbind_from_irq(unsigned int irq)
                evtchn_to_irq[evtchn] = -1;
                irq_info[irq] = IRQ_UNBOUND;
 
-               dynamic_irq_init(irq);
+               dynamic_irq_cleanup(irq);
        }
 
        spin_unlock(&irq_mapping_update_lock);
@@ -557,6 +557,33 @@ out:
        put_cpu();
 }
 
+/* Rebind a new event channel to an existing irq. */
+void rebind_evtchn_irq(int evtchn, int irq)
+{
+       /* Make sure the irq is masked, since the new event channel
+          will also be masked. */
+       disable_irq(irq);
+
+       spin_lock(&irq_mapping_update_lock);
+
+       /* After resume the irq<->evtchn mappings are all cleared out */
+       BUG_ON(evtchn_to_irq[evtchn] != -1);
+       /* Expect irq to have been bound before,
+          so the bindcount should be non-0 */
+       BUG_ON(irq_bindcount[irq] == 0);
+
+       evtchn_to_irq[evtchn] = irq;
+       irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
+
+       spin_unlock(&irq_mapping_update_lock);
+
+       /* new event channels are always bound to cpu 0 */
+       irq_set_affinity(irq, cpumask_of_cpu(0));
+
+       /* Unmask the event channel. */
+       enable_irq(irq);
+}
+
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
 static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 {
@@ -647,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq)
        return ret;
 }
 
+static void restore_cpu_virqs(unsigned int cpu)
+{
+       struct evtchn_bind_virq bind_virq;
+       int virq, irq, evtchn;
+
+       for (virq = 0; virq < NR_VIRQS; virq++) {
+               if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
+                       continue;
+
+               BUG_ON(irq_info[irq].type != IRQT_VIRQ);
+               BUG_ON(irq_info[irq].index != virq);
+
+               /* Get a new binding from Xen. */
+               bind_virq.virq = virq;
+               bind_virq.vcpu = cpu;
+               if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+                                               &bind_virq) != 0)
+                       BUG();
+               evtchn = bind_virq.port;
+
+               /* Record the new mapping. */
+               evtchn_to_irq[evtchn] = irq;
+               irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+               bind_evtchn_to_cpu(evtchn, cpu);
+
+               /* Ready for use. */
+               unmask_evtchn(evtchn);
+       }
+}
+
+static void restore_cpu_ipis(unsigned int cpu)
+{
+       struct evtchn_bind_ipi bind_ipi;
+       int ipi, irq, evtchn;
+
+       for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
+               if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
+                       continue;
+
+               BUG_ON(irq_info[irq].type != IRQT_IPI);
+               BUG_ON(irq_info[irq].index != ipi);
+
+               /* Get a new binding from Xen. */
+               bind_ipi.vcpu = cpu;
+               if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+                                               &bind_ipi) != 0)
+                       BUG();
+               evtchn = bind_ipi.port;
+
+               /* Record the new mapping. */
+               evtchn_to_irq[evtchn] = irq;
+               irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+               bind_evtchn_to_cpu(evtchn, cpu);
+
+               /* Ready for use. */
+               unmask_evtchn(evtchn);
+
+       }
+}
+
+void xen_irq_resume(void)
+{
+       unsigned int cpu, irq, evtchn;
+
+       init_evtchn_cpu_bindings();
+
+       /* New event-channel space is not 'live' yet. */
+       for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+               mask_evtchn(evtchn);
+
+       /* No IRQ <-> event-channel mappings. */
+       for (irq = 0; irq < NR_IRQS; irq++)
+               irq_info[irq].evtchn = 0; /* zap event-channel binding */
+
+       for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+               evtchn_to_irq[evtchn] = -1;
+
+       for_each_possible_cpu(cpu) {
+               restore_cpu_virqs(cpu);
+               restore_cpu_ipis(cpu);
+       }
+}
+
 static struct irq_chip xen_dynamic_chip __read_mostly = {
        .name           = "xen-dyn",
        .mask           = disable_dynirq,
index 52b6b41..e9e1116 100644 (file)
@@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
        return 0;
 }
 
-static int gnttab_resume(void)
+int gnttab_resume(void)
 {
        if (max_nr_grant_frames() < nr_grant_frames)
                return -ENOSYS;
        return gnttab_map(0, nr_grant_frames - 1);
 }
 
-static int gnttab_suspend(void)
+int gnttab_suspend(void)
 {
        arch_gnttab_unmap_shared(shared, nr_grant_frames);
        return 0;
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
new file mode 100644 (file)
index 0000000..5b546e3
--- /dev/null
@@ -0,0 +1,252 @@
+/*
+ * Handle extern requests for shutdown, reboot and sysrq
+ */
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/reboot.h>
+#include <linux/sysrq.h>
+#include <linux/stop_machine.h>
+#include <linux/freezer.h>
+
+#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/hvc-console.h>
+#include <xen/xen-ops.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
+enum shutdown_state {
+       SHUTDOWN_INVALID = -1,
+       SHUTDOWN_POWEROFF = 0,
+       SHUTDOWN_SUSPEND = 2,
+       /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
+          report a crash, not be instructed to crash!
+          HALT is the same as POWEROFF, as far as we're concerned.  The tools use
+          the distinction when we return the reason code to them.  */
+        SHUTDOWN_HALT = 4,
+};
+
+/* Ignore multiple shutdown requests. */
+static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
+
+#ifdef CONFIG_PM_SLEEP
+static int xen_suspend(void *data)
+{
+       int *cancelled = data;
+       int err;
+
+       BUG_ON(!irqs_disabled());
+
+       load_cr3(swapper_pg_dir);
+
+       err = device_power_down(PMSG_SUSPEND);
+       if (err) {
+               printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
+                      err);
+               return err;
+       }
+
+       xen_mm_pin_all();
+       gnttab_suspend();
+       xen_pre_suspend();
+
+       /*
+        * This hypercall returns 1 if suspend was cancelled
+        * or the domain was merely checkpointed, and 0 if it
+        * is resuming in a new domain.
+        */
+       *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+       xen_post_suspend(*cancelled);
+       gnttab_resume();
+       xen_mm_unpin_all();
+
+       device_power_up();
+
+       if (!*cancelled) {
+               xen_irq_resume();
+               xen_console_resume();
+       }
+
+       return 0;
+}
+
+static void do_suspend(void)
+{
+       int err;
+       int cancelled = 1;
+
+       shutting_down = SHUTDOWN_SUSPEND;
+
+#ifdef CONFIG_PREEMPT
+       /* If the kernel is preemptible, we need to freeze all the processes
+          to prevent them from being in the middle of a pagetable update
+          during suspend. */
+       err = freeze_processes();
+       if (err) {
+               printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
+               return;
+       }
+#endif
+
+       err = device_suspend(PMSG_SUSPEND);
+       if (err) {
+               printk(KERN_ERR "xen suspend: device_suspend %d\n", err);
+               goto out;
+       }
+
+       printk("suspending xenbus...\n");
+       /* XXX use normal device tree? */
+       xenbus_suspend();
+
+       err = stop_machine_run(xen_suspend, &cancelled, 0);
+       if (err) {
+               printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
+               goto out;
+       }
+
+       if (!cancelled)
+               xenbus_resume();
+       else
+               xenbus_suspend_cancel();
+
+       device_resume();
+
+       /* Make sure timer events get retriggered on all CPUs */
+       clock_was_set();
+out:
+#ifdef CONFIG_PREEMPT
+       thaw_processes();
+#endif
+       shutting_down = SHUTDOWN_INVALID;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static void shutdown_handler(struct xenbus_watch *watch,
+                            const char **vec, unsigned int len)
+{
+       char *str;
+       struct xenbus_transaction xbt;
+       int err;
+
+       if (shutting_down != SHUTDOWN_INVALID)
+               return;
+
+ again:
+       err = xenbus_transaction_start(&xbt);
+       if (err)
+               return;
+
+       str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
+       /* Ignore read errors and empty reads. */
+       if (XENBUS_IS_ERR_READ(str)) {
+               xenbus_transaction_end(xbt, 1);
+               return;
+       }
+
+       xenbus_write(xbt, "control", "shutdown", "");
+
+       err = xenbus_transaction_end(xbt, 0);
+       if (err == -EAGAIN) {
+               kfree(str);
+               goto again;
+       }
+
+       if (strcmp(str, "poweroff") == 0 ||
+           strcmp(str, "halt") == 0) {
+               shutting_down = SHUTDOWN_POWEROFF;
+               orderly_poweroff(false);
+       } else if (strcmp(str, "reboot") == 0) {
+               shutting_down = SHUTDOWN_POWEROFF; /* ? */
+               ctrl_alt_del();
+#ifdef CONFIG_PM_SLEEP
+       } else if (strcmp(str, "suspend") == 0) {
+               do_suspend();
+#endif
+       } else {
+               printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
+               shutting_down = SHUTDOWN_INVALID;
+       }
+
+       kfree(str);
+}
+
+static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
+                         unsigned int len)
+{
+       char sysrq_key = '\0';
+       struct xenbus_transaction xbt;
+       int err;
+
+ again:
+       err = xenbus_transaction_start(&xbt);
+       if (err)
+               return;
+       if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
+               printk(KERN_ERR "Unable to read sysrq code in "
+                      "control/sysrq\n");
+               xenbus_transaction_end(xbt, 1);
+               return;
+       }
+
+       if (sysrq_key != '\0')
+               xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+
+       err = xenbus_transaction_end(xbt, 0);
+       if (err == -EAGAIN)
+               goto again;
+
+       if (sysrq_key != '\0')
+               handle_sysrq(sysrq_key, NULL);
+}
+
+static struct xenbus_watch shutdown_watch = {
+       .node = "control/shutdown",
+       .callback = shutdown_handler
+};
+
+static struct xenbus_watch sysrq_watch = {
+       .node = "control/sysrq",
+       .callback = sysrq_handler
+};
+
+static int setup_shutdown_watcher(void)
+{
+       int err;
+
+       err = register_xenbus_watch(&shutdown_watch);
+       if (err) {
+               printk(KERN_ERR "Failed to set shutdown watcher\n");
+               return err;
+       }
+
+       err = register_xenbus_watch(&sysrq_watch);
+       if (err) {
+               printk(KERN_ERR "Failed to set sysrq watcher\n");
+               return err;
+       }
+
+       return 0;
+}
+
+static int shutdown_event(struct notifier_block *notifier,
+                         unsigned long event,
+                         void *data)
+{
+       setup_shutdown_watcher();
+       return NOTIFY_DONE;
+}
+
+static int __init setup_shutdown_event(void)
+{
+       static struct notifier_block xenstore_notifier = {
+               .notifier_call = shutdown_event
+       };
+       register_xenstore_notifier(&xenstore_notifier);
+
+       return 0;
+}
+
+subsys_initcall(setup_shutdown_event);
index 6efbe3f..090c61e 100644 (file)
@@ -203,7 +203,6 @@ int xb_read(void *data, unsigned len)
 int xb_init_comms(void)
 {
        struct xenstore_domain_interface *intf = xen_store_interface;
-       int err;
 
        if (intf->req_prod != intf->req_cons)
                printk(KERN_ERR "XENBUS request ring is not quiescent "
@@ -216,18 +215,20 @@ int xb_init_comms(void)
                intf->rsp_cons = intf->rsp_prod;
        }
 
-       if (xenbus_irq)
-               unbind_from_irqhandler(xenbus_irq, &xb_waitq);
+       if (xenbus_irq) {
+               /* Already have an irq; assume we're resuming */
+               rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
+       } else {
+               int err;
+               err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
+                                               0, "xenbus", &xb_waitq);
+               if (err <= 0) {
+                       printk(KERN_ERR "XENBUS request irq failed %i\n", err);
+                       return err;
+               }
 
-       err = bind_evtchn_to_irqhandler(
-               xen_store_evtchn, wake_waiting,
-               0, "xenbus", &xb_waitq);
-       if (err <= 0) {
-               printk(KERN_ERR "XENBUS request irq failed %i\n", err);
-               return err;
+               xenbus_irq = err;
        }
 
-       xenbus_irq = err;
-
        return 0;
 }
index dc936dd..a1e2b94 100644 (file)
@@ -160,6 +160,7 @@ static inline pteval_t native_pte_val(pte_t pte)
 #endif
 
 #define pte_val(x)     native_pte_val(x)
+#define pte_flags(x)   native_pte_val(x)
 #define __pte(x)       native_make_pte(x)
 
 #endif /* CONFIG_PARAVIRT */
index 0f13b94..5ea37a4 100644 (file)
@@ -239,6 +239,7 @@ struct pv_mmu_ops {
                                 unsigned long addr, pte_t *ptep);
 
        pteval_t (*pte_val)(pte_t);
+       pteval_t (*pte_flags)(pte_t);
        pte_t (*make_pte)(pteval_t pte);
 
        pgdval_t (*pgd_val)(pgd_t);
@@ -996,6 +997,20 @@ static inline pteval_t pte_val(pte_t pte)
        return ret;
 }
 
+static inline pteval_t pte_flags(pte_t pte)
+{
+       pteval_t ret;
+
+       if (sizeof(pteval_t) > sizeof(long))
+               ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
+                                pte.pte, (u64)pte.pte >> 32);
+       else
+               ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
+                                pte.pte);
+
+       return ret;
+}
+
 static inline pgd_t __pgd(pgdval_t val)
 {
        pgdval_t ret;
index 97c271b..47a852c 100644 (file)
@@ -164,37 +164,37 @@ extern struct list_head pgd_list;
  */
 static inline int pte_dirty(pte_t pte)
 {
-       return pte_val(pte) & _PAGE_DIRTY;
+       return pte_flags(pte) & _PAGE_DIRTY;
 }
 
 static inline int pte_young(pte_t pte)
 {
-       return pte_val(pte) & _PAGE_ACCESSED;
+       return pte_flags(pte) & _PAGE_ACCESSED;
 }
 
 static inline int pte_write(pte_t pte)
 {
-       return pte_val(pte) & _PAGE_RW;
+       return pte_flags(pte) & _PAGE_RW;
 }
 
 static inline int pte_file(pte_t pte)
 {
-       return pte_val(pte) & _PAGE_FILE;
+       return pte_flags(pte) & _PAGE_FILE;
 }
 
 static inline int pte_huge(pte_t pte)
 {
-       return pte_val(pte) & _PAGE_PSE;
+       return pte_flags(pte) & _PAGE_PSE;
 }
 
 static inline int pte_global(pte_t pte)
 {
-       return pte_val(pte) & _PAGE_GLOBAL;
+       return pte_flags(pte) & _PAGE_GLOBAL;
 }
 
 static inline int pte_exec(pte_t pte)
 {
-       return !(pte_val(pte) & _PAGE_NX);
+       return !(pte_flags(pte) & _PAGE_NX);
 }
 
 static inline int pte_special(pte_t pte)
@@ -305,7 +305,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
        return __pgprot(preservebits | addbits);
 }
 
-#define pte_pgprot(x) __pgprot(pte_val(x) & ~PTE_MASK)
+#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_MASK)
 
 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
 
index c2ccd99..2a4f9b4 100644 (file)
@@ -176,9 +176,9 @@ HYPERVISOR_fpu_taskswitch(int set)
 }
 
 static inline int
-HYPERVISOR_sched_op(int cmd, unsigned long arg)
+HYPERVISOR_sched_op(int cmd, void *arg)
 {
-       return _hypercall2(int, sched_op, cmd, arg);
+       return _hypercall2(int, sched_op_new, cmd, arg);
 }
 
 static inline long
@@ -315,6 +315,13 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
 }
 
 static inline void
+MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
+{
+       mcl->op = __HYPERVISOR_fpu_taskswitch;
+       mcl->args[0] = set;
+}
+
+static inline void
 MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
                        pte_t new_val, unsigned long flags)
 {
index e11f240..377c045 100644 (file)
@@ -26,15 +26,20 @@ typedef struct xpaddr {
 #define FOREIGN_FRAME_BIT      (1UL<<31)
 #define FOREIGN_FRAME(m)       ((m) | FOREIGN_FRAME_BIT)
 
-extern unsigned long *phys_to_machine_mapping;
+/* Maximum amount of memory we can handle in a domain in pages */
+#define MAX_DOMAIN_PAGES                                               \
+    ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
+
+
+extern unsigned long get_phys_to_machine(unsigned long pfn);
+extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return pfn;
 
-       return phys_to_machine_mapping[(unsigned int)(pfn)] &
-               ~FOREIGN_FRAME_BIT;
+       return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
 }
 
 static inline int phys_to_machine_mapping_valid(unsigned long pfn)
@@ -42,7 +47,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return 1;
 
-       return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
+       return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
 }
 
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
@@ -106,20 +111,12 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
        unsigned long pfn = mfn_to_pfn(mfn);
        if ((pfn < max_mapnr)
            && !xen_feature(XENFEAT_auto_translated_physmap)
-           && (phys_to_machine_mapping[pfn] != mfn))
+           && (get_phys_to_machine(pfn) != mfn))
                return max_mapnr; /* force !pfn_valid() */
+       /* XXX fixme; not true with sparsemem */
        return pfn;
 }
 
-static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-       if (xen_feature(XENFEAT_auto_translated_physmap)) {
-               BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-               return;
-       }
-       phys_to_machine_mapping[pfn] = mfn;
-}
-
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)     (phys_to_machine(XPADDR(__pa(v))))
 #define virt_to_mfn(v)         (pfn_to_mfn(PFN_DOWN(__pa(v))))
index a4f27fb..248e6e3 100644 (file)
@@ -108,6 +108,8 @@ struct console {
        struct   console *next;
 };
 
+extern int console_set_on_cmdline;
+
 extern int add_preferred_console(char *name, int idx, char *options);
 extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options);
 extern void register_console(struct console *);
index f31debf..0d2a4e7 100644 (file)
@@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
 __PAGEFLAG(Slab, slab)
 PAGEFLAG(Checked, owner_priv_1)                /* Used by some filesystems */
 PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */
+PAGEFLAG(SavePinned, dirty);                                   /* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
        __SETPAGEFLAG(Private, private)
index acd8e06..67c4436 100644 (file)
@@ -32,6 +32,7 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id);
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
 int resend_irq_on_evtchn(unsigned int irq);
+void rebind_evtchn_irq(int evtchn, int irq);
 
 static inline void notify_remote_via_evtchn(int port)
 {
@@ -40,4 +41,7 @@ static inline void notify_remote_via_evtchn(int port)
 }
 
 extern void notify_remote_via_irq(int irq);
+
+extern void xen_irq_resume(void);
+
 #endif /* _XEN_EVENTS_H */
index 4662048..a40f1cd 100644 (file)
@@ -51,6 +51,9 @@ struct gnttab_free_callback {
        u16 count;
 };
 
+int gnttab_suspend(void);
+int gnttab_resume(void);
+
 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
                                int readonly);
 
index 21c0ecf..98b79bc 100644 (file)
@@ -3,4 +3,13 @@
 
 extern struct console xenboot_console;
 
+#ifdef CONFIG_HVC_XEN
+void xen_console_resume(void);
+#else
+static inline void xen_console_resume(void) { }
+#endif
+
+void xen_raw_console_write(const char *str);
+void xen_raw_printk(const char *fmt, ...);
+
 #endif /* XEN_HVC_CONSOLE_H */
index a64d3df..7a8262c 100644 (file)
  */
 #define XEN_ELFNOTE_BSD_SYMTAB    11
 
+/*
+ * The lowest address the hypervisor hole can begin at (numeric).
+ *
+ * This must not be set higher than HYPERVISOR_VIRT_START. Its presence
+ * also indicates to the hypervisor that the kernel can deal with the
+ * hole starting at a higher address.
+ */
+#define XEN_ELFNOTE_HV_START_LOW  12
+
+/*
+ * List of maddr_t-sized mask/value pairs describing how to recognize
+ * (non-present) L1 page table entries carrying valid MFNs (numeric).
+ */
+#define XEN_ELFNOTE_L1_MFN_VALID  13
+
+/*
+ * Whether or not the guest supports cooperative suspend cancellation.
+ */
+#define XEN_ELFNOTE_SUSPEND_CANCEL 14
+
 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */
 
 /*
index 5a934dd..974a51e 100644 (file)
@@ -49,11 +49,27 @@ struct xenfb_update {
        int32_t height;         /* rect height */
 };
 
+/*
+ * Framebuffer resize notification event
+ * Capable backend sets feature-resize in xenstore.
+ */
+#define XENFB_TYPE_RESIZE 3
+
+struct xenfb_resize {
+       uint8_t type;           /* XENFB_TYPE_RESIZE */
+       int32_t width;          /* width in pixels */
+       int32_t height;         /* height in pixels */
+       int32_t stride;         /* stride in bytes */
+       int32_t depth;          /* depth in bits */
+       int32_t offset;         /* start offset within framebuffer */
+};
+
 #define XENFB_OUT_EVENT_SIZE 40
 
 union xenfb_out_event {
        uint8_t type;
        struct xenfb_update update;
+       struct xenfb_resize resize;
        char pad[XENFB_OUT_EVENT_SIZE];
 };
 
@@ -105,15 +121,18 @@ struct xenfb_page {
         * Each directory page holds PAGE_SIZE / sizeof(*pd)
         * framebuffer pages, and can thus map up to PAGE_SIZE *
         * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
-        * sizeof(unsigned long) == 4, that's 4 Megs.  Two directory
-        * pages should be enough for a while.
+        * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2
+        * Megs 64 bit.  256 directories give enough room for a 512
+        * Meg framebuffer with a max resolution of 12,800x10,240.
+        * Should be enough for a while with room leftover for
+        * expansion.
         */
-       unsigned long pd[2];
+       unsigned long pd[256];
 };
 
 /*
- * Wart: xenkbd needs to know resolution.  Put it here until a better
- * solution is found, but don't leak it to the backend.
+ * Wart: xenkbd needs to know default resolution.  Put it here until a
+ * better solution is found, but don't leak it to the backend.
  */
 #ifdef __KERNEL__
 #define XENFB_WIDTH 800
index fb97f42..8066c78 100644 (file)
@@ -49,6 +49,7 @@ struct xenkbd_motion {
        uint8_t type;           /* XENKBD_TYPE_MOTION */
        int32_t rel_x;          /* relative X motion */
        int32_t rel_y;          /* relative Y motion */
+       int32_t rel_z;          /* relative Z motion (wheel) */
 };
 
 struct xenkbd_key {
@@ -61,6 +62,7 @@ struct xenkbd_position {
        uint8_t type;           /* XENKBD_TYPE_POS */
        int32_t abs_x;          /* absolute X position (in FB pixels) */
        int32_t abs_y;          /* absolute Y position (in FB pixels) */
+       int32_t rel_z;          /* relative Z motion (wheel) */
 };
 
 #define XENKBD_IN_EVENT_SIZE 40
index da76846..af36ead 100644 (file)
@@ -29,7 +29,7 @@ struct xen_memory_reservation {
      *   OUT: GMFN bases of extents that were allocated
      *   (NB. This command also updates the mach_to_phys translation table)
      */
-    ulong extent_start;
+    GUEST_HANDLE(ulong) extent_start;
 
     /* Number of extents, and size/alignment of each (2^extent_order pages). */
     unsigned long  nr_extents;
@@ -50,6 +50,7 @@ struct xen_memory_reservation {
     domid_t        domid;
 
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
 
 /*
  * Returns the maximum machine frame number of mapped RAM in this system.
@@ -85,7 +86,7 @@ struct xen_machphys_mfn_list {
      * any large discontiguities in the machine address space, 2MB gaps in
      * the machphys table will be represented by an MFN base of zero.
      */
-    ulong extent_start;
+    GUEST_HANDLE(ulong) extent_start;
 
     /*
      * Number of extents written to the above array. This will be smaller
@@ -93,6 +94,7 @@ struct xen_machphys_mfn_list {
      */
     unsigned int nr_extents;
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
 
 /*
  * Sets the GPFN at which a particular page appears in the specified guest's
@@ -115,6 +117,7 @@ struct xen_add_to_physmap {
     /* GPFN where the source mapping page should appear. */
     unsigned long gpfn;
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
 
 /*
  * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
@@ -129,13 +132,14 @@ struct xen_translate_gpfn_list {
     unsigned long nr_gpfns;
 
     /* List of GPFNs to translate. */
-    ulong gpfn_list;
+    GUEST_HANDLE(ulong) gpfn_list;
 
     /*
      * Output list to contain MFN translations. May be the same as the input
      * list (in which case each input GPFN is overwritten with the output MFN).
      */
-    ulong mfn_list;
+    GUEST_HANDLE(ulong) mfn_list;
 };
+DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
index 10ddfe0..a706d6a 100644 (file)
@@ -5,4 +5,10 @@
 
 DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 
+void xen_pre_suspend(void);
+void xen_post_suspend(int suspend_cancelled);
+
+void xen_mm_pin_all(void);
+void xen_mm_unpin_all(void);
+
 #endif /* INCLUDE_XEN_OPS_H */
index 8fb01c3..028ed75 100644 (file)
@@ -121,6 +121,8 @@ struct console_cmdline
 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
 static int selected_console = -1;
 static int preferred_console = -1;
+int console_set_on_cmdline;
+EXPORT_SYMBOL(console_set_on_cmdline);
 
 /* Flag: console code may call schedule() */
 static int console_may_schedule;
@@ -890,6 +892,7 @@ static int __init console_setup(char *str)
        *s = 0;
 
        __add_preferred_console(buf, idx, options, brl_options);
+       console_set_on_cmdline = 1;
        return 1;
 }
 __setup("console=", console_setup);