* the dangers of modifying code on the run.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/spinlock.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
+#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
+#include <linux/module.h>
+
+#include <trace/syscall.h>
+#include <asm/cacheflush.h>
+#include <asm/kprobes.h>
#include <asm/ftrace.h>
-#include <linux/ftrace.h>
#include <asm/nops.h>
-#include <asm/nmi.h>
+#ifdef CONFIG_DYNAMIC_FTRACE
+int ftrace_arch_code_modify_prepare(void)
+{
+ set_kernel_text_rw();
+ set_all_modules_text_rw();
+ return 0;
+}
-#ifdef CONFIG_FUNCTION_RET_TRACER
+int ftrace_arch_code_modify_post_process(void)
+{
+ set_all_modules_text_ro();
+ set_kernel_text_ro();
+ return 0;
+}
-/*
- * These functions are picked from those used on
- * this page for dynamic ftrace. They have been
- * simplified to ignore all traces in NMI context.
- */
-static atomic_t in_nmi;
+union ftrace_code_union {
+ char code[MCOUNT_INSN_SIZE];
+ struct {
+ char e8;
+ int offset;
+ } __attribute__((packed));
+};
-void ftrace_nmi_enter(void)
+static int ftrace_calc_offset(long ip, long addr)
{
- atomic_inc(&in_nmi);
+ return (int)(addr - ip);
}
-void ftrace_nmi_exit(void)
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
- atomic_dec(&in_nmi);
+ static union ftrace_code_union calc;
+
+ calc.e8 = 0xe8;
+ calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
+
+ /*
+ * No locking needed, this must be called via kstop_machine
+ * which in essence is like running on a uniprocessor machine.
+ */
+ return calc.code;
}
-/*
- * Synchronize accesses to return adresses stack with
- * interrupts.
- */
-static raw_spinlock_t ret_stack_lock;
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+ return addr >= start && addr < end;
+}
-/* Add a function return address to the trace stack on thread info.*/
-static int push_return_trace(unsigned long ret, unsigned long long time,
- unsigned long func)
+static int
+do_ftrace_mod_code(unsigned long ip, const void *new_code)
{
- int index;
- struct thread_info *ti;
- unsigned long flags;
- int err = 0;
+ /*
+ * On x86_64, kernel text mappings are mapped read-only with
+ * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
+ * of the kernel text mapping to modify the kernel text.
+ *
+ * For 32bit kernels, these mappings are same and we can use
+ * kernel identity mapping to modify code.
+ */
+ if (within(ip, (unsigned long)_text, (unsigned long)_etext))
+ ip = (unsigned long)__va(__pa_symbol(ip));
- raw_local_irq_save(flags);
- __raw_spin_lock(&ret_stack_lock);
+ return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
+}
- ti = current_thread_info();
- /* The return trace stack is full */
- if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) {
- err = -EBUSY;
- goto out;
- }
+static const unsigned char *ftrace_nop_replace(void)
+{
+ return ideal_nops[NOP_ATOMIC5];
+}
+
+static int
+ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE];
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing. We do this by using the
+ * probe_kernel_* functions.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine, or before SMP starts.
+ */
+
+ /* read the text we want to modify */
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
- index = ++ti->curr_ret_stack;
- ti->ret_stack[index].ret = ret;
- ti->ret_stack[index].func = func;
- ti->ret_stack[index].calltime = time;
+ /* replace the text with the new text */
+ if (do_ftrace_mod_code(ip, new_code))
+ return -EPERM;
-out:
- __raw_spin_unlock(&ret_stack_lock);
- raw_local_irq_restore(flags);
- return err;
+ sync_core();
+
+ return 0;
}
-/* Retrieve a function return address to the trace stack on thread info.*/
-static void pop_return_trace(unsigned long *ret, unsigned long long *time,
- unsigned long *func)
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
{
- struct thread_info *ti;
- int index;
- unsigned long flags;
+ unsigned const char *new, *old;
+ unsigned long ip = rec->ip;
- raw_local_irq_save(flags);
- __raw_spin_lock(&ret_stack_lock);
+ old = ftrace_call_replace(ip, addr);
+ new = ftrace_nop_replace();
- ti = current_thread_info();
- index = ti->curr_ret_stack;
- *ret = ti->ret_stack[index].ret;
- *func = ti->ret_stack[index].func;
- *time = ti->ret_stack[index].calltime;
- ti->curr_ret_stack--;
+ /*
+ * On boot up, and when modules are loaded, the MCOUNT_ADDR
+ * is converted to a nop, and will never become MCOUNT_ADDR
+ * again. This code is either running before SMP (on boot up)
+ * or before the code will ever be executed (module load).
+ * We do not want to use the breakpoint version in this case,
+ * just modify the code directly.
+ */
+ if (addr == MCOUNT_ADDR)
+ return ftrace_modify_code_direct(rec->ip, old, new);
- __raw_spin_unlock(&ret_stack_lock);
- raw_local_irq_restore(flags);
+ /* Normal cases use add_brk_on_nop */
+ WARN_ONCE(1, "invalid use of ftrace_make_nop");
+ return -EINVAL;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned const char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_nop_replace();
+ new = ftrace_call_replace(ip, addr);
+
+ /* Should only be called when module is loaded */
+ return ftrace_modify_code_direct(rec->ip, old, new);
}
/*
- * Send the trace to the ring-buffer.
- * @return the original return address.
+ * The modifying_ftrace_code is used to tell the breakpoint
+ * handler to call ftrace_int3_handler(). If it fails to
+ * call this handler for a breakpoint added by ftrace, then
+ * the kernel may crash.
+ *
+ * As atomic_writes on x86 do not need a barrier, we do not
+ * need to add smp_mb()s for this to work. It is also considered
+ * that we can not read the modifying_ftrace_code before
+ * executing the breakpoint. That would be quite remarkable if
+ * it could do that. Here's the flow that is required:
+ *
+ * CPU-0 CPU-1
+ *
+ * atomic_inc(mfc);
+ * write int3s
+ * <trap-int3> // implicit (r)mb
+ * if (atomic_read(mfc))
+ * call ftrace_int3_handler()
+ *
+ * Then when we are finished:
+ *
+ * atomic_dec(mfc);
+ *
+ * If we hit a breakpoint that was not set by ftrace, it does not
+ * matter if ftrace_int3_handler() is called or not. It will
+ * simply be ignored. But it is crucial that a ftrace nop/caller
+ * breakpoint is handled. No other user should ever place a
+ * breakpoint on an ftrace nop/caller location. It must only
+ * be done by this code.
+ */
+atomic_t modifying_ftrace_code __read_mostly;
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code);
+
+/*
+ * Should never be called:
+ * As it is only called by __ftrace_replace_code() which is called by
+ * ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
+ * which is called to turn mcount into nops or nops into function calls
+ * but not to convert a function from not using regs to one that uses
+ * regs, which ftrace_modify_call() is for.
*/
-unsigned long ftrace_return_to_handler(void)
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
{
- struct ftrace_retfunc trace;
- pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
- trace.rettime = cpu_clock(raw_smp_processor_id());
- ftrace_function_return(&trace);
+ WARN_ON(1);
+ return -EINVAL;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+ int ret;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+
+ /* See comment above by declaration of modifying_ftrace_code */
+ atomic_inc(&modifying_ftrace_code);
+
+ ret = ftrace_modify_code(ip, old, new);
+
+ /* Also update the regs callback function */
+ if (!ret) {
+ ip = (unsigned long)(&ftrace_regs_call);
+ memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ ret = ftrace_modify_code(ip, old, new);
+ }
- return trace.ret;
+ atomic_dec(&modifying_ftrace_code);
+
+ return ret;
}
/*
- * Hook the return address and push it in the stack of return addrs
- * in current thread info.
+ * A breakpoint was added to the code address we are about to
+ * modify, and this is the handle that will just skip over it.
+ * We are either changing a nop into a trace call, or a trace
+ * call to a nop. While the change is taking place, we treat
+ * it just like it was a nop.
*/
-asmlinkage
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+int ftrace_int3_handler(struct pt_regs *regs)
{
- unsigned long old;
- unsigned long long calltime;
- int faulted;
- unsigned long return_hooker = (unsigned long)
- &return_to_handler;
+ if (WARN_ON_ONCE(!regs))
+ return 0;
- /* Nmi's are currently unsupported */
- if (atomic_read(&in_nmi))
- return;
+ if (!ftrace_location(regs->ip - 1))
+ return 0;
+ regs->ip += MCOUNT_INSN_SIZE - 1;
+
+ return 1;
+}
+
+static int ftrace_write(unsigned long ip, const char *val, int size)
+{
/*
- * Protect against fault, even if it shouldn't
- * happen. This tool is too much intrusive to
- * ignore such a protection.
+ * On x86_64, kernel text mappings are mapped read-only with
+ * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
+ * of the kernel text mapping to modify the kernel text.
+ *
+ * For 32bit kernels, these mappings are same and we can use
+ * kernel identity mapping to modify code.
*/
- asm volatile(
- "1: movl (%[parent_old]), %[old]\n"
- "2: movl %[return_hooker], (%[parent_replaced])\n"
- " movl $0, %[faulted]\n"
+ if (within(ip, (unsigned long)_text, (unsigned long)_etext))
+ ip = (unsigned long)__va(__pa_symbol(ip));
- ".section .fixup, \"ax\"\n"
- "3: movl $1, %[faulted]\n"
- ".previous\n"
-
- ".section __ex_table, \"a\"\n"
- " .long 1b, 3b\n"
- " .long 2b, 3b\n"
- ".previous\n"
+ return probe_kernel_write((void *)ip, val, size);
+}
- : [parent_replaced] "=rm" (parent), [old] "=r" (old),
- [faulted] "=r" (faulted)
- : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
- : "memory"
- );
+static int add_break(unsigned long ip, const char *old)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE];
+ unsigned char brk = BREAKPOINT_INSTRUCTION;
- if (WARN_ON(faulted)) {
- unregister_ftrace_return();
- return;
- }
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
- if (WARN_ON(!__kernel_text_address(old))) {
- unregister_ftrace_return();
- *parent = old;
- return;
- }
+ /* Make sure it is what we expect it to be */
+ if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
- calltime = cpu_clock(raw_smp_processor_id());
+ if (ftrace_write(ip, &brk, 1))
+ return -EPERM;
- if (push_return_trace(old, calltime, self_addr) == -EBUSY)
- *parent = old;
+ return 0;
}
-static int __init init_ftrace_function_return(void)
+static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
{
- ret_stack_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
- return 0;
+ unsigned const char *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_call_replace(ip, addr);
+
+ return add_break(rec->ip, old);
}
-device_initcall(init_ftrace_function_return);
-#endif
+static int add_brk_on_nop(struct dyn_ftrace *rec)
+{
+ unsigned const char *old;
-#ifdef CONFIG_DYNAMIC_FTRACE
+ old = ftrace_nop_replace();
-union ftrace_code_union {
- char code[MCOUNT_INSN_SIZE];
- struct {
- char e8;
- int offset;
- } __attribute__((packed));
-};
+ return add_break(rec->ip, old);
+}
-static int ftrace_calc_offset(long ip, long addr)
+/*
+ * If the record has the FTRACE_FL_REGS set, that means that it
+ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
+ * is not not set, then it wants to convert to the normal callback.
+ */
+static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
{
- return (int)(addr - ip);
+ if (rec->flags & FTRACE_FL_REGS)
+ return (unsigned long)FTRACE_REGS_ADDR;
+ else
+ return (unsigned long)FTRACE_ADDR;
}
-unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+/*
+ * The FTRACE_FL_REGS_EN is set when the record already points to
+ * a function that saves all the regs. Basically the '_EN' version
+ * represents the current state of the function.
+ */
+static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
{
- static union ftrace_code_union calc;
+ if (rec->flags & FTRACE_FL_REGS_EN)
+ return (unsigned long)FTRACE_REGS_ADDR;
+ else
+ return (unsigned long)FTRACE_ADDR;
+}
- calc.e8 = 0xe8;
- calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
+static int add_breakpoints(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
- /*
- * No locking needed, this must be called via kstop_machine
- * which in essence is like running on a uniprocessor machine.
- */
- return calc.code;
+ ret = ftrace_test_record(rec, enable);
+
+ ftrace_addr = get_ftrace_addr(rec);
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MAKE_CALL:
+ /* converting nop to call */
+ return add_brk_on_nop(rec);
+
+ case FTRACE_UPDATE_MODIFY_CALL_REGS:
+ case FTRACE_UPDATE_MODIFY_CALL:
+ ftrace_addr = get_ftrace_old_addr(rec);
+ /* fall through */
+ case FTRACE_UPDATE_MAKE_NOP:
+ /* converting a call to a nop */
+ return add_brk_on_call(rec, ftrace_addr);
+ }
+ return 0;
}
/*
- * Modifying code must take extra care. On an SMP machine, if
- * the code being modified is also being executed on another CPU
- * that CPU will have undefined results and possibly take a GPF.
- * We use kstop_machine to stop other CPUS from exectuing code.
- * But this does not stop NMIs from happening. We still need
- * to protect against that. We separate out the modification of
- * the code to take care of this.
- *
- * Two buffers are added: An IP buffer and a "code" buffer.
- *
- * 1) Put the instruction pointer into the IP buffer
- * and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
- *
- * If an NMI is executed, the first thing it does is to call
- * "ftrace_nmi_enter". This will check if the flag is set to write
- * and if it is, it will write what is in the IP and "code" buffers.
- *
- * The trick is, it does not matter if everyone is writing the same
- * content to the code location. Also, if a CPU is executing code
- * it is OK to write to that code location if the contents being written
- * are the same as what exists.
+ * On error, we need to remove breakpoints. This needs to
+ * be done caefully. If the address does not currently have a
+ * breakpoint, we know we are done. Otherwise, we look at the
+ * remaining 4 bytes of the instruction. If it matches a nop
+ * we replace the breakpoint with the nop. Otherwise we replace
+ * it with the call instruction.
*/
+static int remove_breakpoint(struct dyn_ftrace *rec)
+{
+ unsigned char ins[MCOUNT_INSN_SIZE];
+ unsigned char brk = BREAKPOINT_INSTRUCTION;
+ const unsigned char *nop;
+ unsigned long ftrace_addr;
+ unsigned long ip = rec->ip;
+
+ /* If we fail the read, just give up */
+ if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
-static atomic_t in_nmi = ATOMIC_INIT(0);
-static int mod_code_status; /* holds return value of text write */
-static int mod_code_write; /* set when NMI should do the write */
-static void *mod_code_ip; /* holds the IP to write to */
-static void *mod_code_newcode; /* holds the text to write to the IP */
+ /* If this does not have a breakpoint, we are done */
+ if (ins[0] != brk)
+ return -1;
-static unsigned nmi_wait_count;
-static atomic_t nmi_update_count = ATOMIC_INIT(0);
+ nop = ftrace_nop_replace();
-int ftrace_arch_read_dyn_info(char *buf, int size)
+ /*
+ * If the last 4 bytes of the instruction do not match
+ * a nop, then we assume that this is a call to ftrace_addr.
+ */
+ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
+ /*
+ * For extra paranoidism, we check if the breakpoint is on
+ * a call that would actually jump to the ftrace_addr.
+ * If not, don't touch the breakpoint, we make just create
+ * a disaster.
+ */
+ ftrace_addr = get_ftrace_addr(rec);
+ nop = ftrace_call_replace(ip, ftrace_addr);
+
+ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
+ goto update;
+
+ /* Check both ftrace_addr and ftrace_old_addr */
+ ftrace_addr = get_ftrace_old_addr(rec);
+ nop = ftrace_call_replace(ip, ftrace_addr);
+
+ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
+ return -EINVAL;
+ }
+
+ update:
+ return probe_kernel_write((void *)ip, &nop[0], 1);
+}
+
+static int add_update_code(unsigned long ip, unsigned const char *new)
+{
+ /* skip breakpoint */
+ ip++;
+ new++;
+ if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
+ return -EPERM;
+ return 0;
+}
+
+static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
{
- int r;
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
- r = snprintf(buf, size, "%u %u",
- nmi_wait_count,
- atomic_read(&nmi_update_count));
- return r;
+ new = ftrace_call_replace(ip, addr);
+ return add_update_code(ip, new);
}
-static void ftrace_mod_code(void)
+static int add_update_nop(struct dyn_ftrace *rec)
{
- /*
- * Yes, more than one CPU process can be writing to mod_code_status.
- * (and the code itself)
- * But if one were to fail, then they all should, and if one were
- * to succeed, then they all should.
- */
- mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
- MCOUNT_INSN_SIZE);
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+ new = ftrace_nop_replace();
+ return add_update_code(ip, new);
}
-void ftrace_nmi_enter(void)
+static int add_update(struct dyn_ftrace *rec, int enable)
{
- atomic_inc(&in_nmi);
- /* Must have in_nmi seen before reading write flag */
- smp_mb();
- if (mod_code_write) {
- ftrace_mod_code();
- atomic_inc(&nmi_update_count);
+ unsigned long ftrace_addr;
+ int ret;
+
+ ret = ftrace_test_record(rec, enable);
+
+ ftrace_addr = get_ftrace_addr(rec);
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MODIFY_CALL_REGS:
+ case FTRACE_UPDATE_MODIFY_CALL:
+ case FTRACE_UPDATE_MAKE_CALL:
+ /* converting nop to call */
+ return add_update_call(rec, ftrace_addr);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ /* converting a call to a nop */
+ return add_update_nop(rec);
}
+
+ return 0;
}
-void ftrace_nmi_exit(void)
+static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
{
- /* Finish all executions before clearing in_nmi */
- smp_wmb();
- atomic_dec(&in_nmi);
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_call_replace(ip, addr);
+
+ if (ftrace_write(ip, new, 1))
+ return -EPERM;
+
+ return 0;
}
-static void wait_for_nmi(void)
+static int finish_update_nop(struct dyn_ftrace *rec)
{
- int waited = 0;
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_nop_replace();
- while (atomic_read(&in_nmi)) {
- waited = 1;
- cpu_relax();
+ if (ftrace_write(ip, new, 1))
+ return -EPERM;
+ return 0;
+}
+
+static int finish_update(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
+
+ ret = ftrace_update_record(rec, enable);
+
+ ftrace_addr = get_ftrace_addr(rec);
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MODIFY_CALL_REGS:
+ case FTRACE_UPDATE_MODIFY_CALL:
+ case FTRACE_UPDATE_MAKE_CALL:
+ /* converting nop to call */
+ return finish_update_call(rec, ftrace_addr);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ /* converting a call to a nop */
+ return finish_update_nop(rec);
}
- if (waited)
- nmi_wait_count++;
+ return 0;
}
-static int
-do_ftrace_mod_code(unsigned long ip, void *new_code)
+static void do_sync_core(void *data)
+{
+ sync_core();
+}
+
+static void run_sync(void)
{
- mod_code_ip = (void *)ip;
- mod_code_newcode = new_code;
+ int enable_irqs = irqs_disabled();
+
+ /* We may be called with interrupts disbled (on bootup). */
+ if (enable_irqs)
+ local_irq_enable();
+ on_each_cpu(do_sync_core, NULL, 1);
+ if (enable_irqs)
+ local_irq_disable();
+}
- /* The buffers need to be visible before we let NMIs write them */
- smp_wmb();
+void ftrace_replace_code(int enable)
+{
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ const char *report = "adding breakpoints";
+ int count = 0;
+ int ret;
- mod_code_write = 1;
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
- /* Make sure write bit is visible before we wait on NMIs */
- smp_mb();
+ ret = add_breakpoints(rec, enable);
+ if (ret)
+ goto remove_breakpoints;
+ count++;
+ }
- wait_for_nmi();
+ run_sync();
- /* Make sure all running NMIs have finished before we write the code */
- smp_mb();
+ report = "updating code";
- ftrace_mod_code();
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
- /* Make sure the write happens before clearing the bit */
- smp_wmb();
+ ret = add_update(rec, enable);
+ if (ret)
+ goto remove_breakpoints;
+ }
+
+ run_sync();
+
+ report = "removing breakpoints";
- mod_code_write = 0;
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+
+ ret = finish_update(rec, enable);
+ if (ret)
+ goto remove_breakpoints;
+ }
- /* make sure NMIs see the cleared bit */
- smp_mb();
+ run_sync();
- wait_for_nmi();
+ return;
- return mod_code_status;
+ remove_breakpoints:
+ ftrace_bug(ret, rec ? rec->ip : 0);
+ printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+ remove_breakpoint(rec);
+ }
}
+static int
+ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code)
+{
+ int ret;
+
+ ret = add_break(ip, old_code);
+ if (ret)
+ goto out;
+
+ run_sync();
+ ret = add_update_code(ip, new_code);
+ if (ret)
+ goto fail_update;
+ run_sync();
-static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+ ret = ftrace_write(ip, new_code, 1);
+ if (ret) {
+ ret = -EPERM;
+ goto out;
+ }
+ run_sync();
+ out:
+ return ret;
-unsigned char *ftrace_nop_replace(void)
+ fail_update:
+ probe_kernel_write((void *)ip, &old_code[0], 1);
+ goto out;
+}
+
+void arch_ftrace_update_code(int command)
{
- return ftrace_nop;
+ /* See comment above by declaration of modifying_ftrace_code */
+ atomic_inc(&modifying_ftrace_code);
+
+ ftrace_modify_all_code(command);
+
+ atomic_dec(&modifying_ftrace_code);
}
-int
-ftrace_modify_code(unsigned long ip, unsigned char *old_code,
- unsigned char *new_code)
+int __init ftrace_dyn_arch_init(void *data)
{
- unsigned char replaced[MCOUNT_INSN_SIZE];
+ /* The return code is retured via data */
+ *(unsigned long *)data = 0;
- /*
- * Note: Due to modules and __init, code can
- * disappear and change, we need to protect against faulting
- * as well as code changing. We do this by using the
- * probe_kernel_* functions.
- *
- * No real locking needed, this code is run through
- * kstop_machine, or before SMP starts.
- */
+ return 0;
+}
+#endif
- /* read the text we want to modify */
- if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+
+static int ftrace_mod_jmp(unsigned long ip,
+ int old_offset, int new_offset)
+{
+ unsigned char code[MCOUNT_INSN_SIZE];
+
+ if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
- /* Make sure it is what we expect it to be */
- if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+ if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
return -EINVAL;
- /* replace the text with the new text */
- if (do_ftrace_mod_code(ip, new_code))
- return -EPERM;
+ *(int *)(&code[1]) = new_offset;
- sync_core();
+ if (do_ftrace_mod_code(ip, &code))
+ return -EPERM;
return 0;
}
-int ftrace_update_ftrace_func(ftrace_func_t func)
+int ftrace_enable_ftrace_graph_caller(void)
{
- unsigned long ip = (unsigned long)(&ftrace_call);
- unsigned char old[MCOUNT_INSN_SIZE], *new;
- int ret;
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ int old_offset, new_offset;
- memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
- new = ftrace_call_replace(ip, (unsigned long)func);
- ret = ftrace_modify_code(ip, old, new);
+ old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+ new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
- return ret;
+ return ftrace_mod_jmp(ip, old_offset, new_offset);
}
-int __init ftrace_dyn_arch_init(void *data)
+int ftrace_disable_ftrace_graph_caller(void)
{
- extern const unsigned char ftrace_test_p6nop[];
- extern const unsigned char ftrace_test_nop5[];
- extern const unsigned char ftrace_test_jmp[];
- int faulted = 0;
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ int old_offset, new_offset;
+
+ old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
+ new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+
+ return ftrace_mod_jmp(ip, old_offset, new_offset);
+}
+
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer)
+{
+ unsigned long old;
+ int faulted;
+ struct ftrace_graph_ent trace;
+ unsigned long return_hooker = (unsigned long)
+ &return_to_handler;
+
+ if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+ return;
/*
- * There is no good nop for all x86 archs.
- * We will default to using the P6_NOP5, but first we
- * will test to make sure that the nop will actually
- * work on this CPU. If it faults, we will then
- * go to a lesser efficient 5 byte nop. If that fails
- * we then just use a jmp as our nop. This isn't the most
- * efficient nop, but we can not use a multi part nop
- * since we would then risk being preempted in the middle
- * of that nop, and if we enabled tracing then, it might
- * cause a system crash.
- *
- * TODO: check the cpuid to determine the best nop.
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
*/
- asm volatile (
- "ftrace_test_jmp:"
- "jmp ftrace_test_p6nop\n"
- "nop\n"
- "nop\n"
- "nop\n" /* 2 byte jmp + 3 bytes */
- "ftrace_test_p6nop:"
- P6_NOP5
- "jmp 1f\n"
- "ftrace_test_nop5:"
- ".byte 0x66,0x66,0x66,0x66,0x90\n"
- "1:"
+ asm volatile(
+ "1: " _ASM_MOV " (%[parent]), %[old]\n"
+ "2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
+ " movl $0, %[faulted]\n"
+ "3:\n"
+
".section .fixup, \"ax\"\n"
- "2: movl $1, %0\n"
- " jmp ftrace_test_nop5\n"
- "3: movl $2, %0\n"
- " jmp 1b\n"
+ "4: movl $1, %[faulted]\n"
+ " jmp 3b\n"
".previous\n"
- _ASM_EXTABLE(ftrace_test_p6nop, 2b)
- _ASM_EXTABLE(ftrace_test_nop5, 3b)
- : "=r"(faulted) : "0" (faulted));
-
- switch (faulted) {
- case 0:
- pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
- memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
- break;
- case 1:
- pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
- memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
- break;
- case 2:
- pr_info("ftrace: converting mcount calls to jmp . + 5\n");
- memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
- break;
+
+ _ASM_EXTABLE(1b, 4b)
+ _ASM_EXTABLE(2b, 4b)
+
+ : [old] "=&r" (old), [faulted] "=r" (faulted)
+ : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (unlikely(faulted)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ return;
}
- /* The return code is retured via data */
- *(unsigned long *)data = 0;
+ trace.func = self_addr;
+ trace.depth = current->curr_ret_stack + 1;
- return 0;
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ *parent = old;
+ return;
+ }
+
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer) == -EBUSY) {
+ *parent = old;
+ return;
+ }
}
-#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */