[IA64] IA64 Kexec/kdump
Zou Nan hai [Thu, 7 Dec 2006 17:51:35 +0000 (09:51 -0800)]
Changes and updates.

1. Remove fake rendz path and related code according to discuss with Khalid Aziz.
2. fc.i offset fix in relocate_kernel.S.
3. iospic shutdown code eoi and mask race fix from Fujitsu.
4. Warm boot hook in machine_kexec to SN SAL code from Jack Steiner.
5. Send slave to SAL slave loop patch from Jay Lan.
6. Kdump on non-recoverable MCA event patch from Jay Lan
7. Use CTL_UNNUMBERED in kdump_on_init sysctl.

Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

19 files changed:
arch/ia64/Kconfig
arch/ia64/kernel/Makefile
arch/ia64/kernel/crash.c [new file with mode: 0644]
arch/ia64/kernel/efi.c
arch/ia64/kernel/entry.S
arch/ia64/kernel/iosapic.c
arch/ia64/kernel/machine_kexec.c [new file with mode: 0644]
arch/ia64/kernel/mca.c
arch/ia64/kernel/relocate_kernel.S [new file with mode: 0644]
arch/ia64/kernel/setup.c
arch/ia64/kernel/smp.c
arch/ia64/sn/kernel/setup.c
include/asm-ia64/kexec.h [new file with mode: 0644]
include/asm-ia64/machvec.h
include/asm-ia64/machvec_sn2.h
include/asm-ia64/meminit.h
include/asm-ia64/sn/sn_sal.h
include/linux/kexec.h
kernel/kexec.c

index 683b12c..75d8397 100644 (file)
@@ -434,6 +434,29 @@ config IA64_ESI
 
 source "drivers/sn/Kconfig"
 
+config KEXEC
+       bool "kexec system call (EXPERIMENTAL)"
+       depends on EXPERIMENTAL && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+       help
+         kexec is a system call that implements the ability to shutdown your
+         current kernel, and to start another kernel.  It is like a reboot
+         but it is indepedent of the system firmware.   And like a reboot
+         you can start any kernel with it, not just Linux.
+
+         The name comes from the similiarity to the exec system call.
+
+         It is an ongoing process to be certain the hardware in a machine
+         is properly shutdown, so do not be surprised if this code does not
+         initially work for you.  It may help to enable device hotplugging
+         support.  As of this writing the exact hardware interface is
+         strongly in flux, so no good recommendation can be made.
+
+config CRASH_DUMP
+         bool "kernel crash dumps (EXPERIMENTAL)"
+         depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+         help
+           Generate crash dump after being started by kexec.
+
 source "drivers/firmware/Kconfig"
 
 source "fs/Kconfig.binfmt"
index cfa099b..8ae384e 100644 (file)
@@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE)    += cyclone.o
 obj-$(CONFIG_CPU_FREQ)         += cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)        += mca_recovery.o
 obj-$(CONFIG_KPROBES)          += kprobes.o jprobes.o
+obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)  += uncached.o
 obj-$(CONFIG_AUDIT)            += audit.o
 obj-$(CONFIG_PCI_MSI)          += msi_ia64.o
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
new file mode 100644 (file)
index 0000000..0aabedf
--- /dev/null
@@ -0,0 +1,245 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp       Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/kexec.h>
+#include <linux/elfcore.h>
+#include <linux/sysctl.h>
+#include <linux/init.h>
+
+#include <asm/kdebug.h>
+#include <asm/mca.h>
+#include <asm/uaccess.h>
+
+int kdump_status[NR_CPUS];
+atomic_t kdump_cpu_freezed;
+atomic_t kdump_in_progress;
+int kdump_on_init = 1;
+ssize_t
+copy_oldmem_page(unsigned long pfn, char *buf,
+               size_t csize, unsigned long offset, int userbuf)
+{
+       void  *vaddr;
+
+       if (!csize)
+               return 0;
+       vaddr = __va(pfn<<PAGE_SHIFT);
+       if (userbuf) {
+               if (copy_to_user(buf, (vaddr + offset), csize)) {
+                       return -EFAULT;
+               }
+       } else
+               memcpy(buf, (vaddr + offset), csize);
+       return csize;
+}
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+               size_t data_len)
+{
+       struct elf_note *note = (struct elf_note *)buf;
+       note->n_namesz = strlen(name) + 1;
+       note->n_descsz = data_len;
+       note->n_type   = type;
+       buf += (sizeof(*note) + 3)/4;
+       memcpy(buf, name, note->n_namesz);
+       buf += (note->n_namesz + 3)/4;
+       memcpy(buf, data, data_len);
+       buf += (data_len + 3)/4;
+       return buf;
+}
+
+static void
+final_note(void *buf)
+{
+       memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
+
+void
+crash_save_this_cpu()
+{
+       void *buf;
+       unsigned long cfm, sof, sol;
+
+       int cpu = smp_processor_id();
+       struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
+
+       elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
+       memset(prstatus, 0, sizeof(*prstatus));
+       prstatus->pr_pid = current->pid;
+
+       ia64_dump_cpu_regs(dst);
+       cfm = dst[43];
+       sol = (cfm >> 7) & 0x7f;
+       sof = cfm & 0x7f;
+       dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+                       sof - sol);
+
+       buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+       if (!buf)
+               return;
+       buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
+                       sizeof(*prstatus));
+       final_note(buf);
+}
+
+static int
+kdump_wait_cpu_freeze(void)
+{
+       int cpu_num = num_online_cpus() - 1;
+       int timeout = 1000;
+       while(timeout-- > 0) {
+               if (atomic_read(&kdump_cpu_freezed) == cpu_num)
+                       return 0;
+               udelay(1000);
+       }
+       return 1;
+}
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+       /* This function is only called after the system
+        * has paniced or is otherwise in a critical state.
+        * The minimum amount of code to allow a kexec'd kernel
+        * to run successfully needs to happen here.
+        *
+        * In practice this means shooting down the other cpus in
+        * an SMP system.
+        */
+       kexec_disable_iosapic();
+#ifdef CONFIG_SMP
+       kdump_smp_send_stop();
+       if (kdump_wait_cpu_freeze() && kdump_on_init)   {
+               //not all cpu response to IPI, send INIT to freeze them
+               kdump_smp_send_init();
+       }
+#endif
+}
+
+static void
+machine_kdump_on_init(void)
+{
+       local_irq_disable();
+       kexec_disable_iosapic();
+       machine_kexec(ia64_kimage);
+}
+
+void
+kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
+{
+       int cpuid;
+       local_irq_disable();
+       cpuid = smp_processor_id();
+       crash_save_this_cpu();
+       current->thread.ksp = (__u64)info->sw - 16;
+       atomic_inc(&kdump_cpu_freezed);
+       kdump_status[cpuid] = 1;
+       mb();
+       if (cpuid == 0) {
+               for (;;)
+                       cpu_relax();
+       } else
+               ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]);
+}
+
+static int
+kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
+{
+       struct ia64_mca_notify_die *nd;
+       struct die_args *args = data;
+
+       if (!kdump_on_init)
+               return NOTIFY_DONE;
+
+       if (val != DIE_INIT_MONARCH_ENTER &&
+           val != DIE_INIT_SLAVE_ENTER &&
+           val != DIE_MCA_RENDZVOUS_LEAVE &&
+           val != DIE_MCA_MONARCH_LEAVE)
+               return NOTIFY_DONE;
+
+       nd = (struct ia64_mca_notify_die *)args->err;
+       /* Reason code 1 means machine check rendezous*/
+       if ((val == DIE_INIT_MONARCH_ENTER || DIE_INIT_SLAVE_ENTER) &&
+                nd->sos->rv_rc == 1)
+               return NOTIFY_DONE;
+
+       switch (val) {
+               case DIE_INIT_MONARCH_ENTER:
+                       machine_kdump_on_init();
+                       break;
+               case DIE_INIT_SLAVE_ENTER:
+                       unw_init_running(kdump_cpu_freeze, NULL);
+                       break;
+               case DIE_MCA_RENDZVOUS_LEAVE:
+                       if (atomic_read(&kdump_in_progress))
+                               unw_init_running(kdump_cpu_freeze, NULL);
+                       break;
+               case DIE_MCA_MONARCH_LEAVE:
+                    /* die_register->signr indicate if MCA is recoverable */
+                       if (!args->signr)
+                               machine_kdump_on_init();
+                       break;
+       }
+       return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_SYSCTL
+static ctl_table kdump_on_init_table[] = {
+       {
+               .ctl_name = CTL_UNNUMBERED,
+               .procname = "kdump_on_init",
+               .data = &kdump_on_init,
+               .maxlen = sizeof(int),
+               .mode = 0644,
+               .proc_handler = &proc_dointvec,
+       },
+       { .ctl_name = 0 }
+};
+
+static ctl_table sys_table[] = {
+       {
+         .ctl_name = CTL_KERN,
+         .procname = "kernel",
+         .mode = 0555,
+         .child = kdump_on_init_table,
+       },
+       { .ctl_name = 0 }
+};
+#endif
+
+static int
+machine_crash_setup(void)
+{
+       char *from = strstr(saved_command_line, "elfcorehdr=");
+       static struct notifier_block kdump_init_notifier_nb = {
+               .notifier_call = kdump_init_notifier,
+       };
+       int ret;
+       if (from)
+               elfcorehdr_addr = memparse(from+11, &from);
+       saved_max_pfn = (unsigned long)-1;
+       if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
+               return ret;
+#ifdef CONFIG_SYSCTL
+       register_sysctl_table(sys_table, 0);
+#endif
+       return 0;
+}
+
+__initcall(machine_crash_setup);
+
index bb8770a..9b96e7d 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/efi.h>
+#include <linux/kexec.h>
 
 #include <asm/io.h>
 #include <asm/kregs.h>
@@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void *, ...);
 struct efi efi;
 EXPORT_SYMBOL(efi);
 static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
 
 #define efi_call_virt(f, args...)      (*(f))(args)
 
@@ -421,6 +422,8 @@ efi_init (void)
                        mem_limit = memparse(cp + 4, &cp);
                } else if (memcmp(cp, "max_addr=", 9) == 0) {
                        max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+               } else if (memcmp(cp, "min_addr=", 9) == 0) {
+                       min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
                } else {
                        while (*cp != ' ' && *cp)
                                ++cp;
@@ -428,6 +431,8 @@ efi_init (void)
                                ++cp;
                }
        }
+       if (min_addr != 0UL)
+               printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
        if (max_addr != ~0UL)
                printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
 
@@ -894,7 +899,8 @@ find_memmap_space (void)
                as = max(contig_low, md->phys_addr);
                ae = min(contig_high, efi_md_end(md));
 
-               /* keep within max_addr= command line arg */
+               /* keep within max_addr= and min_addr= command line arg */
+               as = max(as, min_addr);
                ae = min(ae, max_addr);
                if (ae <= as)
                        continue;
@@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
                } else
                        ae = efi_md_end(md);
 
-               /* keep within max_addr= command line arg */
+               /* keep within max_addr= and min_addr= command line arg */
+               as = max(as, min_addr);
                ae = min(ae, max_addr);
                if (ae <= as)
                        continue;
@@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct resource *code_resource,
                         */
                        insert_resource(res, code_resource);
                        insert_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+                        insert_resource(res, &efi_memmap_res);
+                        insert_resource(res, &boot_param_res);
+                       if (crashk_res.end > crashk_res.start)
+                               insert_resource(res, &crashk_res);
+#endif
                }
        }
 }
+
+#ifdef CONFIG_KEXEC
+/* find a block of memory aligned to 64M exclude reserved regions
+   rsvd_regions are sorted
+ */
+unsigned long
+kdump_find_rsvd_region (unsigned long size,
+               struct rsvd_region *r, int n)
+{
+  int i;
+  u64 start, end;
+  u64 alignment = 1UL << _PAGE_SIZE_64M;
+  void *efi_map_start, *efi_map_end, *p;
+  efi_memory_desc_t *md;
+  u64 efi_desc_size;
+
+  efi_map_start = __va(ia64_boot_param->efi_memmap);
+  efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
+  efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+  for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+         md = p;
+         if (!efi_wb(md))
+                 continue;
+         start = ALIGN(md->phys_addr, alignment);
+         end = efi_md_end(md);
+         for (i = 0; i < n; i++) {
+               if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
+                       if (__pa(r[i].start) > start + size)
+                               return start;
+                       start = ALIGN(__pa(r[i].end), alignment);
+                       if (i < n-1 && __pa(r[i+1].start) < start + size)
+                               continue;
+                       else
+                               break;
+               }
+         }
+         if (end > start + size)
+               return start;
+  }
+
+  printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n",
+       size);
+  return ~0UL;
+}
+#endif
index 3390b7c..15234ed 100644 (file)
@@ -1575,7 +1575,7 @@ sys_call_table:
        data8 sys_mq_timedreceive               // 1265
        data8 sys_mq_notify
        data8 sys_mq_getsetattr
-       data8 sys_ni_syscall                    // reserved for kexec_load
+       data8 sys_kexec_load
        data8 sys_ni_syscall                    // reserved for vserver
        data8 sys_waitid                        // 1270
        data8 sys_add_key
index 60d6495..0fc5fb7 100644 (file)
@@ -288,6 +288,27 @@ nop (unsigned int irq)
        /* do nothing... */
 }
 
+
+#ifdef CONFIG_KEXEC
+void
+kexec_disable_iosapic(void)
+{
+       struct iosapic_intr_info *info;
+       struct iosapic_rte_info *rte;
+       u8 vec = 0;
+       for (info = iosapic_intr_info; info <
+                       iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) {
+               list_for_each_entry(rte, &info->rtes,
+                               rte_list) {
+                       iosapic_write(rte->addr,
+                                       IOSAPIC_RTE_LOW(rte->rte_index),
+                                       IOSAPIC_MASK|vec);
+                       iosapic_eoi(rte->addr, vec);
+               }
+       }
+}
+#endif
+
 static void
 mask_irq (unsigned int irq)
 {
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
new file mode 100644 (file)
index 0000000..468233f
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+
+typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
+               struct ia64_boot_param *, unsigned long);
+
+struct kimage *ia64_kimage;
+
+struct resource efi_memmap_res = {
+        .name  = "EFI Memory Map",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+        .name  = "Boot parameter",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+       void *control_code_buffer;
+       const unsigned long *func;
+
+       func = (unsigned long *)&relocate_new_kernel;
+       /* Pre-load control code buffer to minimize work in kexec path */
+       control_code_buffer = page_address(image->control_code_page);
+       memcpy((void *)control_code_buffer, (const void *)func[0],
+                       relocate_new_kernel_size);
+       flush_icache_range((unsigned long)control_code_buffer,
+                       (unsigned long)control_code_buffer + relocate_new_kernel_size);
+       ia64_kimage = image;
+
+       return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void machine_shutdown(void)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu) {
+               if (cpu != smp_processor_id())
+                       cpu_down(cpu);
+       }
+       kexec_disable_iosapic();
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+extern void *efi_get_pal_addr(void);
+static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
+{
+       struct kimage *image = arg;
+       relocate_new_kernel_t rnk;
+       void *pal_addr = efi_get_pal_addr();
+       unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+       unsigned long vector;
+       int ii;
+
+       if (image->type == KEXEC_TYPE_CRASH) {
+               crash_save_this_cpu();
+               current->thread.ksp = (__u64)info->sw - 16;
+       }
+
+       /* Interrupts aren't acceptable while we reboot */
+       local_irq_disable();
+
+       /* Mask CMC and Performance Monitor interrupts */
+       ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+       ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+       /* Mask ITV and Local Redirect Registers */
+       ia64_set_itv(1 << 16);
+       ia64_set_lrr0(1 << 16);
+       ia64_set_lrr1(1 << 16);
+
+       /* terminate possible nested in-service interrupts */
+       for (ii = 0; ii < 16; ii++)
+               ia64_eoi();
+
+       /* unmask TPR and clear any pending interrupts */
+       ia64_setreg(_IA64_REG_CR_TPR, 0);
+       ia64_srlz_d();
+       vector = ia64_get_ivr();
+       while (vector != IA64_SPURIOUS_INT_VECTOR) {
+               ia64_eoi();
+               vector = ia64_get_ivr();
+       }
+       platform_kernel_launch_event();
+       rnk = (relocate_new_kernel_t)&code_addr;
+       (*rnk)(image->head, image->start, ia64_boot_param,
+                    GRANULEROUNDDOWN((unsigned long) pal_addr));
+       BUG();
+}
+
+void machine_kexec(struct kimage *image)
+{
+       unw_init_running(ia64_machine_kexec, image);
+       for(;;);
+}
index 6bedd97..87c1c4f 100644 (file)
@@ -82,6 +82,7 @@
 #include <asm/system.h>
 #include <asm/sal.h>
 #include <asm/mca.h>
+#include <asm/kexec.h>
 
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
@@ -1238,6 +1239,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
        } else {
                /* Dump buffered message to console */
                ia64_mlogbuf_finish(1);
+#ifdef CONFIG_CRASH_DUMP
+               atomic_set(&kdump_in_progress, 1);
+               monarch_cpu = -1;
+#endif
        }
        if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
                        == NOTIFY_STOP)
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
new file mode 100644 (file)
index 0000000..ae473e3
--- /dev/null
@@ -0,0 +1,334 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz  <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp,  Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+       /* Must be relocatable PIC code callable as a C function
+        */
+GLOBAL_ENTRY(relocate_new_kernel)
+       .prologue
+       alloc r31=ar.pfs,4,0,0,0
+        .body
+.reloc_entry:
+{
+       rsm psr.i| psr.ic
+       mov r2=ip
+}
+       ;;
+{
+        flushrs                         // must be first insn in group
+        srlz.i
+}
+       ;;
+       dep r2=0,r2,61,3                //to physical address
+       ;;
+       //first switch to physical mode
+       add r3=1f-.reloc_entry, r2
+       movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+       mov ar.rsc=0                    // put RSE in enforced lazy mode
+       ;;
+       add sp=(memory_stack_end - 16 - .reloc_entry),r2
+       add r8=(register_stack - .reloc_entry),r2
+       ;;
+       mov r18=ar.rnat
+       mov ar.bspstore=r8
+       ;;
+        mov cr.ipsr=r16
+        mov cr.iip=r3
+        mov cr.ifs=r0
+       srlz.i
+       ;;
+       mov ar.rnat=r18
+       rfi
+       ;;
+1:
+       //physical mode code begin
+       mov b6=in1
+       dep r28=0,in2,61,3      //to physical address
+
+       // purge all TC entries
+#define O(member)       IA64_CPUINFO_##member##_OFFSET
+        GET_THIS_PADDR(r2, cpu_info)    // load phys addr of cpu_info into r2
+        ;;
+        addl r17=O(PTCE_STRIDE),r2
+        addl r2=O(PTCE_BASE),r2
+        ;;
+        ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;            // r18=ptce_base
+        ld4 r19=[r2],4                                  // r19=ptce_count[0]
+        ld4 r21=[r17],4                                 // r21=ptce_stride[0]
+        ;;
+        ld4 r20=[r2]                                    // r20=ptce_count[1]
+        ld4 r22=[r17]                                   // r22=ptce_stride[1]
+        mov r24=r0
+        ;;
+        adds r20=-1,r20
+        ;;
+#undef O
+2:
+        cmp.ltu p6,p7=r24,r19
+(p7)    br.cond.dpnt.few 4f
+        mov ar.lc=r20
+3:
+        ptc.e r18
+        ;;
+        add r18=r22,r18
+        br.cloop.sptk.few 3b
+        ;;
+        add r18=r21,r18
+        add r24=1,r24
+        ;;
+        br.sptk.few 2b
+4:
+        srlz.i
+        ;;
+       //purge TR entry for kernel text and data
+        movl r16=KERNEL_START
+        mov r18=KERNEL_TR_PAGE_SHIFT<<2
+        ;;
+        ptr.i r16, r18
+        ptr.d r16, r18
+        ;;
+        srlz.i
+        ;;
+
+       // purge TR entry for percpu data
+        movl r16=PERCPU_ADDR
+        mov r18=PERCPU_PAGE_SHIFT<<2
+        ;;
+        ptr.d r16,r18
+        ;;
+        srlz.d
+       ;;
+
+        // purge TR entry for pal code
+        mov r16=in3
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.i r16,r18
+        ;;
+        srlz.i
+       ;;
+
+        // purge TR entry for stack
+        mov r16=IA64_KR(CURRENT_STACK)
+        ;;
+        shl r16=r16,IA64_GRANULE_SHIFT
+        movl r19=PAGE_OFFSET
+        ;;
+        add r16=r19,r16
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.d r16,r18
+        ;;
+        srlz.i
+       ;;
+
+       //copy segments
+       movl r16=PAGE_MASK
+        mov  r30=in0                    // in0 is page_list
+        br.sptk.few .dest_page
+       ;;
+.loop:
+       ld8  r30=[in0], 8;;
+.dest_page:
+       tbit.z p0, p6=r30, 0;;          // 0x1 dest page
+(p6)   and r17=r30, r16
+(p6)   br.cond.sptk.few .loop;;
+
+       tbit.z p0, p6=r30, 1;;          // 0x2 indirect page
+(p6)   and in0=r30, r16
+(p6)   br.cond.sptk.few .loop;;
+
+       tbit.z p0, p6=r30, 2;;          // 0x4 end flag
+(p6)   br.cond.sptk.few .end_loop;;
+
+       tbit.z p6, p0=r30, 3;;          // 0x8 source page
+(p6)   br.cond.sptk.few .loop
+
+       and r18=r30, r16
+
+       // simple copy page, may optimize later
+       movl r14=PAGE_SIZE/8 - 1;;
+       mov ar.lc=r14;;
+1:
+       ld8 r14=[r18], 8;;
+       st8 [r17]=r14;;
+       fc.i r17
+       add r17=8, r17
+       br.ctop.sptk.few 1b
+       br.sptk.few .loop
+       ;;
+
+.end_loop:
+       sync.i                  // for fc.i
+       ;;
+       srlz.i
+       ;;
+       srlz.d
+       ;;
+       br.call.sptk.many b0=b6;;
+
+.align  32
+memory_stack:
+       .fill           8192, 1, 0
+memory_stack_end:
+register_stack:
+       .fill           8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+.global relocate_new_kernel_size
+relocate_new_kernel_size:
+       data8   relocate_new_kernel_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+        .prologue
+        alloc loc0=ar.pfs,1,2,0,0
+        .body
+        mov     ar.rsc=0                // put RSE in enforced lazy mode
+        add     loc1=4*8, in0           // save r4 and r5 first
+        ;;
+{
+        flushrs                         // flush dirty regs to backing store
+        srlz.i
+}
+        st8 [loc1]=r4, 8
+        ;;
+        st8 [loc1]=r5, 8
+        ;;
+        add loc1=32*8, in0
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r0, 8                        // r0
+        st8 [loc1]=r4, 8               // rnat
+        mov r5=pr
+        ;;
+        st8 [in0]=r1, 8                        // r1
+        st8 [loc1]=r5, 8               // pr
+        mov r4=b0
+        ;;
+        st8 [in0]=r2, 8                        // r2
+        st8 [loc1]=r4, 8               // b0
+        mov r5=b1;
+        ;;
+        st8 [in0]=r3, 24               // r3
+        st8 [loc1]=r5, 8               // b1
+        mov r4=b2
+        ;;
+        st8 [in0]=r6, 8                        // r6
+        st8 [loc1]=r4, 8               // b2
+       mov r5=b3
+        ;;
+        st8 [in0]=r7, 8                        // r7
+        st8 [loc1]=r5, 8               // b3
+        mov r4=b4
+        ;;
+        st8 [in0]=r8, 8                        // r8
+        st8 [loc1]=r4, 8               // b4
+        mov r5=b5
+        ;;
+        st8 [in0]=r9, 8                        // r9
+        st8 [loc1]=r5, 8               // b5
+        mov r4=b6
+        ;;
+        st8 [in0]=r10, 8               // r10
+        st8 [loc1]=r5, 8               // b6
+        mov r5=b7
+        ;;
+        st8 [in0]=r11, 8               // r11
+        st8 [loc1]=r5, 8               // b7
+        mov r4=b0
+        ;;
+        st8 [in0]=r12, 8               // r12
+        st8 [loc1]=r4, 8               // ip
+        mov r5=loc0
+       ;;
+        st8 [in0]=r13, 8               // r13
+        extr.u r5=r5, 0, 38            // ar.pfs.pfm
+       mov r4=r0                       // user mask
+        ;;
+        st8 [in0]=r14, 8               // r14
+        st8 [loc1]=r5, 8               // cfm
+        ;;
+        st8 [in0]=r15, 8               // r15
+        st8 [loc1]=r4, 8               // user mask
+       mov r5=ar.rsc
+        ;;
+        st8 [in0]=r16, 8               // r16
+        st8 [loc1]=r5, 8               // ar.rsc
+        mov r4=ar.bsp
+        ;;
+        st8 [in0]=r17, 8               // r17
+        st8 [loc1]=r4, 8               // ar.bsp
+        mov r5=ar.bspstore
+        ;;
+        st8 [in0]=r18, 8               // r18
+        st8 [loc1]=r5, 8               // ar.bspstore
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r19, 8               // r19
+        st8 [loc1]=r4, 8               // ar.rnat
+        mov r5=ar.ccv
+        ;;
+        st8 [in0]=r20, 8               // r20
+       st8 [loc1]=r5, 8                // ar.ccv
+        mov r4=ar.unat
+        ;;
+        st8 [in0]=r21, 8               // r21
+        st8 [loc1]=r4, 8               // ar.unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r22, 8               // r22
+        st8 [loc1]=r5, 8               // ar.fpsr
+        mov r4 = ar.unat
+        ;;
+        st8 [in0]=r23, 8               // r23
+        st8 [loc1]=r4, 8               // unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r24, 8               // r24
+        st8 [loc1]=r5, 8               // fpsr
+        mov r4 = ar.pfs
+        ;;
+        st8 [in0]=r25, 8               // r25
+        st8 [loc1]=r4, 8               // ar.pfs
+        mov r5 = ar.lc
+        ;;
+        st8 [in0]=r26, 8               // r26
+        st8 [loc1]=r5, 8               // ar.lc
+        mov r4 = ar.ec
+        ;;
+        st8 [in0]=r27, 8               // r27
+        st8 [loc1]=r4, 8               // ar.ec
+        mov r5 = ar.csd
+        ;;
+        st8 [in0]=r28, 8               // r28
+        st8 [loc1]=r5, 8               // ar.csd
+        mov r4 = ar.ssd
+        ;;
+        st8 [in0]=r29, 8               // r29
+        st8 [loc1]=r4, 8               // ar.ssd
+        ;;
+        st8 [in0]=r30, 8               // r30
+        ;;
+       st8 [in0]=r31, 8                // r31
+        mov ar.pfs=loc0
+        ;;
+        br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
index d10404a..14e1200 100644 (file)
@@ -43,6 +43,8 @@
 #include <linux/initrd.h>
 #include <linux/pm.h>
 #include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
 
 #include <asm/ia32.h>
 #include <asm/machvec.h>
@@ -252,6 +254,41 @@ reserve_memory (void)
        efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
        n++;
 
+#ifdef CONFIG_KEXEC
+       /* crashkernel=size@offset specifies the size to reserve for a crash
+        * kernel.(offset is ingored for keep compatibility with other archs)
+        * By reserving this memory we guarantee that linux never set's it
+        * up as a DMA target.Useful for holding code to do something
+        * appropriate after a kernel panic.
+        */
+       {
+               char *from = strstr(saved_command_line, "crashkernel=");
+               unsigned long base, size;
+               if (from) {
+                       size = memparse(from + 12, &from);
+                       if (size) {
+                               sort_regions(rsvd_region, n);
+                               base = kdump_find_rsvd_region(size,
+                               rsvd_region, n);
+                               if (base != ~0UL) {
+                                       rsvd_region[n].start =
+                                               (unsigned long)__va(base);
+                                       rsvd_region[n].end =
+                                               (unsigned long)__va(base + size);
+                                       n++;
+                                       crashk_res.start = base;
+                                       crashk_res.end = base + size - 1;
+                               }
+                       }
+               }
+               efi_memmap_res.start = ia64_boot_param->efi_memmap;
+                efi_memmap_res.end = efi_memmap_res.start +
+                        ia64_boot_param->efi_memmap_size;
+                boot_param_res.start = __pa(ia64_boot_param);
+                boot_param_res.end = boot_param_res.start +
+                        sizeof(*ia64_boot_param);
+       }
+#endif
        /* end of memory marker */
        rsvd_region[n].start = ~0UL;
        rsvd_region[n].end   = ~0UL;
@@ -263,6 +300,7 @@ reserve_memory (void)
        sort_regions(rsvd_region, num_rsvd_regions);
 }
 
+
 /**
  * find_initrd - get initrd parameters from the boot parameter structure
  *
index 6ab95ce..b1b9aa4 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/efi.h>
 #include <linux/bitops.h>
+#include <linux/kexec.h>
 
 #include <asm/atomic.h>
 #include <asm/current.h>
@@ -66,6 +67,7 @@ static volatile struct call_data_struct *call_data;
 
 #define IPI_CALL_FUNC          0
 #define IPI_CPU_STOP           1
+#define IPI_KDUMP_CPU_STOP     3
 
 /* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
 static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
@@ -155,7 +157,11 @@ handle_IPI (int irq, void *dev_id)
                              case IPI_CPU_STOP:
                                stop_this_cpu();
                                break;
-
+#ifdef CONFIG_CRASH_DUMP
+                             case IPI_KDUMP_CPU_STOP:
+                               unw_init_running(kdump_cpu_freeze, NULL);
+                               break;
+#endif
                              default:
                                printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
                                break;
@@ -213,6 +219,26 @@ send_IPI_self (int op)
        send_IPI_single(smp_processor_id(), op);
 }
 
+#ifdef CONFIG_CRASH_DUMP
+void
+kdump_smp_send_stop()
+{
+       send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
+}
+
+void
+kdump_smp_send_init()
+{
+       unsigned int cpu, self_cpu;
+       self_cpu = smp_processor_id();
+       for_each_online_cpu(cpu) {
+               if (cpu != self_cpu) {
+                       if(kdump_status[cpu] == 0)
+                               platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
+               }
+       }
+}
+#endif
 /*
  * Called with preeemption disabled.
  */
index 1d009f9..a934ad0 100644 (file)
@@ -769,5 +769,13 @@ int sn_prom_feature_available(int id)
                return 0;
        return test_bit(id, sn_prom_features);
 }
+
+void
+sn_kernel_launch_event(void)
+{
+       /* ignore status until we understand possible failure, if any*/
+       if (ia64_sn_kernel_launch_event())
+               printk(KERN_ERR "KEXEC is not supported in this PROM, Please update the PROM.\n");
+}
 EXPORT_SYMBOL(sn_prom_feature_available);
 
diff --git a/include/asm-ia64/kexec.h b/include/asm-ia64/kexec.h
new file mode 100644 (file)
index 0000000..01c36b0
--- /dev/null
@@ -0,0 +1,47 @@
+#ifndef _ASM_IA64_KEXEC_H
+#define _ASM_IA64_KEXEC_H
+
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_IA_64
+
+#define MAX_NOTE_BYTES 1024
+
+#define kexec_flush_icache_page(page) do { \
+                unsigned long page_addr = (unsigned long)page_address(page); \
+                flush_icache_range(page_addr, page_addr + PAGE_SIZE); \
+        } while(0)
+
+extern struct kimage *ia64_kimage;
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+const extern unsigned int relocate_new_kernel_size;
+extern void relocate_new_kernel(unsigned long, unsigned long,
+               struct ia64_boot_param *, unsigned long);
+static inline void
+crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+}
+extern struct resource efi_memmap_res;
+extern struct resource boot_param_res;
+extern void kdump_smp_send_stop(void);
+extern void kdump_smp_send_init(void);
+extern void kexec_disable_iosapic(void);
+extern void crash_save_this_cpu(void);
+struct rsvd_region;
+extern unsigned long kdump_find_rsvd_region(unsigned long size,
+               struct rsvd_region *rsvd_regions, int n);
+extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg);
+extern int kdump_status[];
+extern atomic_t kdump_cpu_freezed;
+extern atomic_t kdump_in_progress;
+
+#endif /* _ASM_IA64_KEXEC_H */
index 8f784f8..a3891eb 100644 (file)
@@ -37,6 +37,7 @@ typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
                                        u8 size);
 typedef void ia64_mv_migrate_t(struct task_struct * task);
 typedef void ia64_mv_pci_fixup_bus_t (struct pci_bus *);
+typedef void ia64_mv_kernel_launch_event_t(void);
 
 /* DMA-mapping interface: */
 typedef void ia64_mv_dma_init (void);
@@ -218,6 +219,7 @@ struct ia64_machine_vector {
        ia64_mv_setup_msi_irq_t *setup_msi_irq;
        ia64_mv_teardown_msi_irq_t *teardown_msi_irq;
        ia64_mv_pci_fixup_bus_t *pci_fixup_bus;
+       ia64_mv_kernel_launch_event_t *kernel_launch_event;
 } __attribute__((__aligned__(16))); /* align attrib? see above comment */
 
 #define MACHVEC_INIT(name)                     \
@@ -318,6 +320,9 @@ extern ia64_mv_dma_supported                swiotlb_dma_supported;
 #ifndef platform_tlb_migrate_finish
 # define platform_tlb_migrate_finish   machvec_noop_mm
 #endif
+#ifndef platform_kernel_launch_event
+# define platform_kernel_launch_event  machvec_noop
+#endif
 #ifndef platform_dma_init
 # define platform_dma_init             swiotlb_init
 #endif
index 83325f6..eaa2fce 100644 (file)
@@ -67,6 +67,7 @@ extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device;
 extern ia64_mv_dma_mapping_error       sn_dma_mapping_error;
 extern ia64_mv_dma_supported           sn_dma_supported;
 extern ia64_mv_migrate_t               sn_migrate;
+extern ia64_mv_kernel_launch_event_t   sn_kernel_launch_event;
 extern ia64_mv_setup_msi_irq_t         sn_setup_msi_irq;
 extern ia64_mv_teardown_msi_irq_t      sn_teardown_msi_irq;
 extern ia64_mv_pci_fixup_bus_t         sn_pci_fixup_bus;
@@ -121,6 +122,7 @@ extern ia64_mv_pci_fixup_bus_t              sn_pci_fixup_bus;
 #define platform_dma_mapping_error             sn_dma_mapping_error
 #define platform_dma_supported         sn_dma_supported
 #define platform_migrate               sn_migrate
+#define platform_kernel_launch_event    sn_kernel_launch_event
 #ifdef CONFIG_PCI_MSI
 #define platform_setup_msi_irq         sn_setup_msi_irq
 #define platform_teardown_msi_irq      sn_teardown_msi_irq
index c3b1f86..c8df759 100644 (file)
  *     - initrd (optional)
  *     - command line string
  *     - kernel code & data
+ *     - crash dumping code reserved region
  *     - Kernel memory map built from EFI memory map
  *
  * More could be added if necessary
  */
-#define IA64_MAX_RSVD_REGIONS 6
+#define IA64_MAX_RSVD_REGIONS 7
 
 struct rsvd_region {
        unsigned long start;    /* virtual address of beginning of element */
index be5d83a..2c4004e 100644 (file)
@@ -88,6 +88,8 @@
 #define  SN_SAL_INJECT_ERROR                      0x02000067
 #define  SN_SAL_SET_CPU_NUMBER                    0x02000068
 
+#define  SN_SAL_KERNEL_LAUNCH_EVENT               0x02000069
+
 /*
  * Service-specific constants
  */
@@ -1155,4 +1157,11 @@ ia64_sn_set_cpu_number(int cpu)
        SAL_CALL_NOLOCK(rv, SN_SAL_SET_CPU_NUMBER, cpu, 0, 0, 0, 0, 0, 0);
        return rv.status;
 }
+static inline int
+ia64_sn_kernel_launch_event(void)
+{
+       struct ia64_sal_retval rv;
+       SAL_CALL_NOLOCK(rv, SN_SAL_KERNEL_LAUNCH_EVENT, 0, 0, 0, 0, 0, 0, 0);
+       return rv.status;
+}
 #endif /* _ASM_IA64_SN_SN_SAL_H */
index a4ede62..e14cd38 100644 (file)
@@ -108,6 +108,10 @@ int kexec_should_crash(struct task_struct *);
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
 
+#ifndef kexec_flush_icache_page
+#define kexec_flush_icache_page(page)
+#endif
+
 #define KEXEC_ON_CRASH  0x00000001
 #define KEXEC_ARCH_MASK 0xffff0000
 
@@ -133,6 +137,7 @@ extern struct resource crashk_res;
 typedef u32 note_buf_t[MAX_NOTE_BYTES/4];
 extern note_buf_t *crash_notes;
 
+
 #else /* !CONFIG_KEXEC */
 struct pt_regs;
 struct task_struct;
index fcdd5d2..05aada2 100644 (file)
@@ -851,6 +851,7 @@ static int kimage_load_crash_segment(struct kimage *image,
                        memset(ptr + uchunk, 0, mchunk - uchunk);
                }
                result = copy_from_user(ptr, buf, uchunk);
+               kexec_flush_icache_page(page);
                kunmap(page);
                if (result) {
                        result = (result < 0) ? result : -EIO;