]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - arch/i386/kernel/alternative.c
x86: Stop MCEs and NMIs during code patching
[linux-2.6.git] / arch / i386 / kernel / alternative.c
index 426f59b0106b65ae5c9fe7d574354c92ff9a8a32..c3750c2c41137a694be9570443f59b379cc4cc4d 100644 (file)
@@ -2,26 +2,57 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/kprobes.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
+#include <asm/pgtable.h>
+#include <asm/mce.h>
+#include <asm/nmi.h>
 
-static int smp_alt_once      = 0;
-static int debug_alternative = 0;
+#ifdef CONFIG_HOTPLUG_CPU
+static int smp_alt_once;
 
 static int __init bootonly(char *str)
 {
        smp_alt_once = 1;
        return 1;
 }
+__setup("smp-alt-boot", bootonly);
+#else
+#define smp_alt_once 1
+#endif
+
+static int debug_alternative;
+
 static int __init debug_alt(char *str)
 {
        debug_alternative = 1;
        return 1;
 }
-
-__setup("smp-alt-boot", bootonly);
 __setup("debug-alternative", debug_alt);
 
+static int noreplace_smp;
+
+static int __init setup_noreplace_smp(char *str)
+{
+       noreplace_smp = 1;
+       return 1;
+}
+__setup("noreplace-smp", setup_noreplace_smp);
+
+#ifdef CONFIG_PARAVIRT
+static int noreplace_paravirt = 0;
+
+static int __init setup_noreplace_paravirt(char *str)
+{
+       noreplace_paravirt = 1;
+       return 1;
+}
+__setup("noreplace-paravirt", setup_noreplace_paravirt);
+#endif
+
 #define DPRINTK(fmt, args...) if (debug_alternative) \
        printk(KERN_DEBUG fmt, args)
 
@@ -125,18 +156,15 @@ static void nop_out(void *insns, unsigned int len)
                unsigned int noplen = len;
                if (noplen > ASM_NOP_MAX)
                        noplen = ASM_NOP_MAX;
-               memcpy(insns, noptable[noplen], noplen);
+               text_poke(insns, noptable[noplen], noplen);
                insns += noplen;
                len -= noplen;
        }
 }
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
 
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
 /* Replace instructions with better alternatives for this CPU type.
    This runs before SMP is initialized to avoid SMP problems with
    self modifying code. This implies that assymetric systems where
@@ -171,29 +199,6 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 
 #ifdef CONFIG_SMP
 
-static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end)
-{
-       struct alt_instr *a;
-
-       DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end);
-       for (a = start; a < end; a++) {
-               memcpy(a->replacement + a->replacementlen,
-                      a->instr,
-                      a->instrlen);
-       }
-}
-
-static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end)
-{
-       struct alt_instr *a;
-
-       for (a = start; a < end; a++) {
-               memcpy(a->instr,
-                      a->replacement + a->replacementlen,
-                      a->instrlen);
-       }
-}
-
 static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 {
        u8 **ptr;
@@ -203,7 +208,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
                        continue;
                if (*ptr > text_end)
                        continue;
-               **ptr = 0xf0; /* lock prefix */
+               text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
        };
 }
 
@@ -211,6 +216,9 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
 {
        u8 **ptr;
 
+       if (noreplace_smp)
+               return;
+
        for (ptr = start; ptr < end; ptr++) {
                if (*ptr < text)
                        continue;
@@ -245,6 +253,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
        struct smp_alt_module *smp;
        unsigned long flags;
 
+       if (noreplace_smp)
+               return;
+
        if (smp_alt_once) {
                if (boot_cpu_has(X86_FEATURE_UP))
                        alternatives_smp_unlock(locks, locks_end,
@@ -279,7 +290,7 @@ void alternatives_smp_module_del(struct module *mod)
        struct smp_alt_module *item;
        unsigned long flags;
 
-       if (smp_alt_once)
+       if (smp_alt_once || noreplace_smp)
                return;
 
        spin_lock_irqsave(&smp_alt, flags);
@@ -310,7 +321,7 @@ void alternatives_smp_switch(int smp)
        return;
 #endif
 
-       if (smp_alt_once)
+       if (noreplace_smp || smp_alt_once)
                return;
        BUG_ON(!smp && (num_online_cpus() > 1));
 
@@ -319,8 +330,6 @@ void alternatives_smp_switch(int smp)
                printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
                clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
                clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
-               alternatives_smp_apply(__smp_alt_instructions,
-                                      __smp_alt_instructions_end);
                list_for_each_entry(mod, &smp_alt_modules, next)
                        alternatives_smp_lock(mod->locks, mod->locks_end,
                                              mod->text, mod->text_end);
@@ -328,8 +337,6 @@ void alternatives_smp_switch(int smp)
                printk(KERN_INFO "SMP alternatives: switching to UP code\n");
                set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
                set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
-               apply_alternatives(__smp_alt_instructions,
-                                  __smp_alt_instructions_end);
                list_for_each_entry(mod, &smp_alt_modules, next)
                        alternatives_smp_unlock(mod->locks, mod->locks_end,
                                                mod->text, mod->text_end);
@@ -340,36 +347,27 @@ void alternatives_smp_switch(int smp)
 #endif
 
 #ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+void apply_paravirt(struct paravirt_patch_site *start,
+                   struct paravirt_patch_site *end)
 {
-       struct paravirt_patch *p;
+       struct paravirt_patch_site *p;
+
+       if (noreplace_paravirt)
+               return;
 
        for (p = start; p < end; p++) {
                unsigned int used;
 
                used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
                                          p->len);
-#ifdef CONFIG_DEBUG_PARAVIRT
-               {
-               int i;
-               /* Deliberately clobber regs using "not %reg" to find bugs. */
-               for (i = 0; i < 3; i++) {
-                       if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
-                               memcpy(p->instr + used, "\xf7\xd0", 2);
-                               p->instr[used+1] |= i;
-                               used += 2;
-                       }
-               }
-               }
-#endif
+
+               BUG_ON(used > p->len);
+
                /* Pad the rest with nops */
                nop_out(p->instr + used, p->len - used);
        }
-
-       /* Sync to be conservative, in case we patched following instructions */
-       sync_core();
 }
-extern struct paravirt_patch __start_parainstructions[],
+extern struct paravirt_patch_site __start_parainstructions[],
        __stop_parainstructions[];
 #endif /* CONFIG_PARAVIRT */
 
@@ -377,6 +375,14 @@ void __init alternative_instructions(void)
 {
        unsigned long flags;
 
+       /* The patching is not fully atomic, so try to avoid local interruptions
+          that might execute the to be patched code.
+          Other CPUs are not running. */
+       stop_nmi();
+#ifdef CONFIG_MCE
+       stop_mce();
+#endif
+
        local_irq_save(flags);
        apply_alternatives(__alt_instructions, __alt_instructions_end);
 
@@ -386,8 +392,6 @@ void __init alternative_instructions(void)
 #ifdef CONFIG_HOTPLUG_CPU
        if (num_possible_cpus() < 2)
                smp_alt_once = 1;
-#else
-       smp_alt_once = 1;
 #endif
 
 #ifdef CONFIG_SMP
@@ -396,23 +400,52 @@ void __init alternative_instructions(void)
                        printk(KERN_INFO "SMP alternatives: switching to UP code\n");
                        set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
                        set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
-                       apply_alternatives(__smp_alt_instructions,
-                                          __smp_alt_instructions_end);
                        alternatives_smp_unlock(__smp_locks, __smp_locks_end,
                                                _text, _etext);
                }
                free_init_pages("SMP alternatives",
-                               (unsigned long)__smp_alt_begin,
-                               (unsigned long)__smp_alt_end);
+                               (unsigned long)__smp_locks,
+                               (unsigned long)__smp_locks_end);
        } else {
-               alternatives_smp_save(__smp_alt_instructions,
-                                     __smp_alt_instructions_end);
                alternatives_smp_module_add(NULL, "core kernel",
                                            __smp_locks, __smp_locks_end,
                                            _text, _etext);
                alternatives_smp_switch(0);
        }
 #endif
-       apply_paravirt(__start_parainstructions, __stop_parainstructions);
+       apply_paravirt(__parainstructions, __parainstructions_end);
        local_irq_restore(flags);
+
+       restart_nmi();
+#ifdef CONFIG_MCE
+       restart_mce();
+#endif
+}
+
+/*
+ * Warning:
+ * When you use this code to patch more than one byte of an instruction
+ * you need to make sure that other CPUs cannot execute this code in parallel.
+ * Also no thread must be currently preempted in the middle of these instructions.
+ * And on the local CPU you need to be protected again NMI or MCE handlers
+ * seeing an inconsistent instruction while you patch.
+ */
+void __kprobes text_poke(void *oaddr, unsigned char *opcode, int len)
+{
+        u8 *addr = oaddr;
+       if (!pte_write(*lookup_address((unsigned long)addr))) {
+               struct page *p[2] = { virt_to_page(addr), virt_to_page(addr+PAGE_SIZE) };
+               addr = vmap(p, 2, VM_MAP, PAGE_KERNEL);
+               if (!addr)
+                       return;
+               addr += ((unsigned long)oaddr) % PAGE_SIZE;
+       }
+       memcpy(addr, opcode, len);
+       sync_core();
+       /* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline
+          case. */
+       if (cpu_has_clflush)
+               asm("clflush (%0) " :: "r" (oaddr) : "memory");
+       if (addr != oaddr)
+               vunmap(addr);
 }