kmemcheck: add the kmemcheck core
Vegard Nossum [Thu, 3 Apr 2008 22:51:41 +0000 (00:51 +0200)]
General description: kmemcheck is a patch to the linux kernel that
detects use of uninitialized memory. It does this by trapping every
read and write to memory that was allocated dynamically (e.g. using
kmalloc()). If a memory address is read that has not previously been
written to, a message is printed to the kernel log.

Thanks to Andi Kleen for the set_memory_4k() solution.

Andrew Morton suggested documenting the shadow member of struct page.

Signed-off-by: Vegard Nossum <vegardno@ifi.uio.no>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

[export kmemcheck_mark_initialized]
[build fix for setup_max_cpus]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

[rebased for mainline inclusion]
Signed-off-by: Vegard Nossum <vegardno@ifi.uio.no>

19 files changed:
arch/x86/Makefile
arch/x86/include/asm/kmemcheck.h [new file with mode: 0644]
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_types.h
arch/x86/mm/Makefile
arch/x86/mm/kmemcheck/Makefile [new file with mode: 0644]
arch/x86/mm/kmemcheck/error.c [new file with mode: 0644]
arch/x86/mm/kmemcheck/error.h [new file with mode: 0644]
arch/x86/mm/kmemcheck/kmemcheck.c [new file with mode: 0644]
arch/x86/mm/kmemcheck/opcode.c [new file with mode: 0644]
arch/x86/mm/kmemcheck/opcode.h [new file with mode: 0644]
arch/x86/mm/kmemcheck/pte.c [new file with mode: 0644]
arch/x86/mm/kmemcheck/pte.h [new file with mode: 0644]
arch/x86/mm/kmemcheck/shadow.c [new file with mode: 0644]
arch/x86/mm/kmemcheck/shadow.h [new file with mode: 0644]
include/linux/kmemcheck.h [new file with mode: 0644]
include/linux/mm_types.h
init/main.c
kernel/sysctl.c

index edbd0ca..1b68659 100644 (file)
@@ -81,6 +81,11 @@ ifdef CONFIG_CC_STACKPROTECTOR
         endif
 endif
 
+# Don't unroll struct assignments with kmemcheck enabled
+ifeq ($(CONFIG_KMEMCHECK),y)
+       KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
+endif
+
 # Stackpointer is addressed different for 32 bit and 64 bit x86
 sp-$(CONFIG_X86_32) := esp
 sp-$(CONFIG_X86_64) := rsp
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h
new file mode 100644 (file)
index 0000000..ed01518
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef ASM_X86_KMEMCHECK_H
+#define ASM_X86_KMEMCHECK_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_KMEMCHECK
+bool kmemcheck_active(struct pt_regs *regs);
+
+void kmemcheck_show(struct pt_regs *regs);
+void kmemcheck_hide(struct pt_regs *regs);
+
+bool kmemcheck_fault(struct pt_regs *regs,
+       unsigned long address, unsigned long error_code);
+bool kmemcheck_trap(struct pt_regs *regs);
+#else
+static inline bool kmemcheck_active(struct pt_regs *regs)
+{
+       return false;
+}
+
+static inline void kmemcheck_show(struct pt_regs *regs)
+{
+}
+
+static inline void kmemcheck_hide(struct pt_regs *regs)
+{
+}
+
+static inline bool kmemcheck_fault(struct pt_regs *regs,
+       unsigned long address, unsigned long error_code)
+{
+       return false;
+}
+
+static inline bool kmemcheck_trap(struct pt_regs *regs)
+{
+       return false;
+}
+#endif /* CONFIG_KMEMCHECK */
+
+#endif
index 18ef7eb..c5a0807 100644 (file)
@@ -317,6 +317,15 @@ static inline int pte_present(pte_t a)
        return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
 }
 
+static inline int pte_hidden(pte_t x)
+{
+#ifdef CONFIG_KMEMCHECK
+       return pte_flags(x) & _PAGE_HIDDEN;
+#else
+       return 0;
+#endif
+}
+
 static inline int pmd_present(pmd_t pmd)
 {
        return pmd_flags(pmd) & _PAGE_PRESENT;
index 4d258ad..9b5c921 100644 (file)
@@ -18,7 +18,7 @@
 #define _PAGE_BIT_GLOBAL       8       /* Global TLB entry PPro+ */
 #define _PAGE_BIT_UNUSED1      9       /* available for programmer */
 #define _PAGE_BIT_IOMAP                10      /* flag used to indicate IO mapping */
-#define _PAGE_BIT_UNUSED3      11
+#define _PAGE_BIT_HIDDEN       11      /* hidden by kmemcheck */
 #define _PAGE_BIT_PAT_LARGE    12      /* On 2MB or 1GB pages */
 #define _PAGE_BIT_SPECIAL      _PAGE_BIT_UNUSED1
 #define _PAGE_BIT_CPA_TEST     _PAGE_BIT_UNUSED1
@@ -41,7 +41,7 @@
 #define _PAGE_GLOBAL   (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
 #define _PAGE_UNUSED1  (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
 #define _PAGE_IOMAP    (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
-#define _PAGE_UNUSED3  (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
+#define _PAGE_HIDDEN   (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
 #define _PAGE_PAT      (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
 #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
 #define _PAGE_SPECIAL  (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
index fdd30d0..eefdeee 100644 (file)
@@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP)      += dump_pagetables.o
 
 obj-$(CONFIG_HIGHMEM)          += highmem_32.o
 
+obj-$(CONFIG_KMEMCHECK)                += kmemcheck/
+
 obj-$(CONFIG_MMIOTRACE)                += mmiotrace.o
 mmiotrace-y                    := kmmio.o pf_in.o mmio-mod.o
 obj-$(CONFIG_MMIOTRACE_TEST)   += testmmiotrace.o
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile
new file mode 100644 (file)
index 0000000..4666b7a
--- /dev/null
@@ -0,0 +1 @@
+obj-y := error.o kmemcheck.o opcode.o pte.o shadow.o
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
new file mode 100644 (file)
index 0000000..5ec9f5a
--- /dev/null
@@ -0,0 +1,229 @@
+#include <linux/interrupt.h>
+#include <linux/kdebug.h>
+#include <linux/kmemcheck.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+
+#include "error.h"
+#include "shadow.h"
+
+enum kmemcheck_error_type {
+       KMEMCHECK_ERROR_INVALID_ACCESS,
+       KMEMCHECK_ERROR_BUG,
+};
+
+#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT)
+
+struct kmemcheck_error {
+       enum kmemcheck_error_type type;
+
+       union {
+               /* KMEMCHECK_ERROR_INVALID_ACCESS */
+               struct {
+                       /* Kind of access that caused the error */
+                       enum kmemcheck_shadow state;
+                       /* Address and size of the erroneous read */
+                       unsigned long   address;
+                       unsigned int    size;
+               };
+       };
+
+       struct pt_regs          regs;
+       struct stack_trace      trace;
+       unsigned long           trace_entries[32];
+
+       /* We compress it to a char. */
+       unsigned char           shadow_copy[SHADOW_COPY_SIZE];
+       unsigned char           memory_copy[SHADOW_COPY_SIZE];
+};
+
+/*
+ * Create a ring queue of errors to output. We can't call printk() directly
+ * from the kmemcheck traps, since this may call the console drivers and
+ * result in a recursive fault.
+ */
+static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE];
+static unsigned int error_count;
+static unsigned int error_rd;
+static unsigned int error_wr;
+static unsigned int error_missed_count;
+
+static struct kmemcheck_error *error_next_wr(void)
+{
+       struct kmemcheck_error *e;
+
+       if (error_count == ARRAY_SIZE(error_fifo)) {
+               ++error_missed_count;
+               return NULL;
+       }
+
+       e = &error_fifo[error_wr];
+       if (++error_wr == ARRAY_SIZE(error_fifo))
+               error_wr = 0;
+       ++error_count;
+       return e;
+}
+
+static struct kmemcheck_error *error_next_rd(void)
+{
+       struct kmemcheck_error *e;
+
+       if (error_count == 0)
+               return NULL;
+
+       e = &error_fifo[error_rd];
+       if (++error_rd == ARRAY_SIZE(error_fifo))
+               error_rd = 0;
+       --error_count;
+       return e;
+}
+
+static void do_wakeup(unsigned long);
+static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0);
+
+/*
+ * Save the context of an error report.
+ */
+void kmemcheck_error_save(enum kmemcheck_shadow state,
+       unsigned long address, unsigned int size, struct pt_regs *regs)
+{
+       static unsigned long prev_ip;
+
+       struct kmemcheck_error *e;
+       void *shadow_copy;
+       void *memory_copy;
+
+       /* Don't report several adjacent errors from the same EIP. */
+       if (regs->ip == prev_ip)
+               return;
+       prev_ip = regs->ip;
+
+       e = error_next_wr();
+       if (!e)
+               return;
+
+       e->type = KMEMCHECK_ERROR_INVALID_ACCESS;
+
+       e->state = state;
+       e->address = address;
+       e->size = size;
+
+       /* Save regs */
+       memcpy(&e->regs, regs, sizeof(*regs));
+
+       /* Save stack trace */
+       e->trace.nr_entries = 0;
+       e->trace.entries = e->trace_entries;
+       e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
+       e->trace.skip = 0;
+       save_stack_trace_bp(&e->trace, regs->bp);
+
+       /* Round address down to nearest 16 bytes */
+       shadow_copy = kmemcheck_shadow_lookup(address
+               & ~(SHADOW_COPY_SIZE - 1));
+       BUG_ON(!shadow_copy);
+
+       memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE);
+
+       kmemcheck_show_addr(address);
+       memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1));
+       memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE);
+       kmemcheck_hide_addr(address);
+
+       tasklet_hi_schedule_first(&kmemcheck_tasklet);
+}
+
+/*
+ * Save the context of a kmemcheck bug.
+ */
+void kmemcheck_error_save_bug(struct pt_regs *regs)
+{
+       struct kmemcheck_error *e;
+
+       e = error_next_wr();
+       if (!e)
+               return;
+
+       e->type = KMEMCHECK_ERROR_BUG;
+
+       memcpy(&e->regs, regs, sizeof(*regs));
+
+       e->trace.nr_entries = 0;
+       e->trace.entries = e->trace_entries;
+       e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
+       e->trace.skip = 1;
+       save_stack_trace(&e->trace);
+
+       tasklet_hi_schedule_first(&kmemcheck_tasklet);
+}
+
+void kmemcheck_error_recall(void)
+{
+       static const char *desc[] = {
+               [KMEMCHECK_SHADOW_UNALLOCATED]          = "unallocated",
+               [KMEMCHECK_SHADOW_UNINITIALIZED]        = "uninitialized",
+               [KMEMCHECK_SHADOW_INITIALIZED]          = "initialized",
+               [KMEMCHECK_SHADOW_FREED]                = "freed",
+       };
+
+       static const char short_desc[] = {
+               [KMEMCHECK_SHADOW_UNALLOCATED]          = 'a',
+               [KMEMCHECK_SHADOW_UNINITIALIZED]        = 'u',
+               [KMEMCHECK_SHADOW_INITIALIZED]          = 'i',
+               [KMEMCHECK_SHADOW_FREED]                = 'f',
+       };
+
+       struct kmemcheck_error *e;
+       unsigned int i;
+
+       e = error_next_rd();
+       if (!e)
+               return;
+
+       switch (e->type) {
+       case KMEMCHECK_ERROR_INVALID_ACCESS:
+               printk(KERN_ERR  "WARNING: kmemcheck: Caught %d-bit read "
+                       "from %s memory (%p)\n",
+                       8 * e->size, e->state < ARRAY_SIZE(desc) ?
+                               desc[e->state] : "(invalid shadow state)",
+                       (void *) e->address);
+
+               printk(KERN_INFO);
+               for (i = 0; i < SHADOW_COPY_SIZE; ++i)
+                       printk("%02x", e->memory_copy[i]);
+               printk("\n");
+
+               printk(KERN_INFO);
+               for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
+                       if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
+                               printk(" %c", short_desc[e->shadow_copy[i]]);
+                       else
+                               printk(" ?");
+               }
+               printk("\n");
+               printk(KERN_INFO "%*c\n", 2 + 2
+                       * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
+               break;
+       case KMEMCHECK_ERROR_BUG:
+               printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n");
+               break;
+       }
+
+       __show_regs(&e->regs, 1);
+       print_stack_trace(&e->trace, 0);
+}
+
+static void do_wakeup(unsigned long data)
+{
+       while (error_count > 0)
+               kmemcheck_error_recall();
+
+       if (error_missed_count > 0) {
+               printk(KERN_WARNING "kmemcheck: Lost %d error reports because "
+                       "the queue was too small\n", error_missed_count);
+               error_missed_count = 0;
+       }
+}
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
new file mode 100644 (file)
index 0000000..0efc2e8
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H
+#define ARCH__X86__MM__KMEMCHECK__ERROR_H
+
+#include <linux/ptrace.h>
+
+#include "shadow.h"
+
+void kmemcheck_error_save(enum kmemcheck_shadow state,
+       unsigned long address, unsigned int size, struct pt_regs *regs);
+
+void kmemcheck_error_save_bug(struct pt_regs *regs);
+
+void kmemcheck_error_recall(void);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
new file mode 100644 (file)
index 0000000..9de7d8f
--- /dev/null
@@ -0,0 +1,650 @@
+/**
+ * kmemcheck - a heavyweight memory checker for the linux kernel
+ * Copyright (C) 2007, 2008  Vegard Nossum <vegardno@ifi.uio.no>
+ * (With a lot of help from Ingo Molnar and Pekka Enberg.)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2) as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/page-flags.h>
+#include <linux/percpu.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/cacheflush.h>
+#include <asm/kmemcheck.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+#include "error.h"
+#include "opcode.h"
+#include "pte.h"
+#include "shadow.h"
+
+#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
+#  define KMEMCHECK_ENABLED 0
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
+#  define KMEMCHECK_ENABLED 1
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
+#  define KMEMCHECK_ENABLED 2
+#endif
+
+int kmemcheck_enabled = KMEMCHECK_ENABLED;
+
+int __init kmemcheck_init(void)
+{
+       printk(KERN_INFO "kmemcheck: \"Bugs, beware!\"\n");
+
+#ifdef CONFIG_SMP
+       /*
+        * Limit SMP to use a single CPU. We rely on the fact that this code
+        * runs before SMP is set up.
+        */
+       if (setup_max_cpus > 1) {
+               printk(KERN_INFO
+                       "kmemcheck: Limiting number of CPUs to 1.\n");
+               setup_max_cpus = 1;
+       }
+#endif
+
+       return 0;
+}
+
+early_initcall(kmemcheck_init);
+
+#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
+int kmemcheck_enabled = 0;
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
+int kmemcheck_enabled = 1;
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
+int kmemcheck_enabled = 2;
+#endif
+
+/*
+ * We need to parse the kmemcheck= option before any memory is allocated.
+ */
+static int __init param_kmemcheck(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       sscanf(str, "%d", &kmemcheck_enabled);
+       return 0;
+}
+
+early_param("kmemcheck", param_kmemcheck);
+
+int kmemcheck_show_addr(unsigned long address)
+{
+       pte_t *pte;
+
+       pte = kmemcheck_pte_lookup(address);
+       if (!pte)
+               return 0;
+
+       set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+       __flush_tlb_one(address);
+       return 1;
+}
+
+int kmemcheck_hide_addr(unsigned long address)
+{
+       pte_t *pte;
+
+       pte = kmemcheck_pte_lookup(address);
+       if (!pte)
+               return 0;
+
+       set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+       __flush_tlb_one(address);
+       return 1;
+}
+
+struct kmemcheck_context {
+       bool busy;
+       int balance;
+
+       /*
+        * There can be at most two memory operands to an instruction, but
+        * each address can cross a page boundary -- so we may need up to
+        * four addresses that must be hidden/revealed for each fault.
+        */
+       unsigned long addr[4];
+       unsigned long n_addrs;
+       unsigned long flags;
+
+       /* Data size of the instruction that caused a fault. */
+       unsigned int size;
+};
+
+static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
+
+bool kmemcheck_active(struct pt_regs *regs)
+{
+       struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+       return data->balance > 0;
+}
+
+/* Save an address that needs to be shown/hidden */
+static void kmemcheck_save_addr(unsigned long addr)
+{
+       struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+       BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
+       data->addr[data->n_addrs++] = addr;
+}
+
+static unsigned int kmemcheck_show_all(void)
+{
+       struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+       unsigned int i;
+       unsigned int n;
+
+       n = 0;
+       for (i = 0; i < data->n_addrs; ++i)
+               n += kmemcheck_show_addr(data->addr[i]);
+
+       return n;
+}
+
+static unsigned int kmemcheck_hide_all(void)
+{
+       struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+       unsigned int i;
+       unsigned int n;
+
+       n = 0;
+       for (i = 0; i < data->n_addrs; ++i)
+               n += kmemcheck_hide_addr(data->addr[i]);
+
+       return n;
+}
+
+/*
+ * Called from the #PF handler.
+ */
+void kmemcheck_show(struct pt_regs *regs)
+{
+       struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+       BUG_ON(!irqs_disabled());
+
+       if (unlikely(data->balance != 0)) {
+               kmemcheck_show_all();
+               kmemcheck_error_save_bug(regs);
+               data->balance = 0;
+               return;
+       }
+
+       /*
+        * None of the addresses actually belonged to kmemcheck. Note that
+        * this is not an error.
+        */
+       if (kmemcheck_show_all() == 0)
+               return;
+
+       ++data->balance;
+
+       /*
+        * The IF needs to be cleared as well, so that the faulting
+        * instruction can run "uninterrupted". Otherwise, we might take
+        * an interrupt and start executing that before we've had a chance
+        * to hide the page again.
+        *
+        * NOTE: In the rare case of multiple faults, we must not override
+        * the original flags:
+        */
+       if (!(regs->flags & X86_EFLAGS_TF))
+               data->flags = regs->flags;
+
+       regs->flags |= X86_EFLAGS_TF;
+       regs->flags &= ~X86_EFLAGS_IF;
+}
+
+/*
+ * Called from the #DB handler.
+ */
+void kmemcheck_hide(struct pt_regs *regs)
+{
+       struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+       int n;
+
+       BUG_ON(!irqs_disabled());
+
+       if (data->balance == 0)
+               return;
+
+       if (unlikely(data->balance != 1)) {
+               kmemcheck_show_all();
+               kmemcheck_error_save_bug(regs);
+               data->n_addrs = 0;
+               data->balance = 0;
+
+               if (!(data->flags & X86_EFLAGS_TF))
+                       regs->flags &= ~X86_EFLAGS_TF;
+               if (data->flags & X86_EFLAGS_IF)
+                       regs->flags |= X86_EFLAGS_IF;
+               return;
+       }
+
+       if (kmemcheck_enabled)
+               n = kmemcheck_hide_all();
+       else
+               n = kmemcheck_show_all();
+
+       if (n == 0)
+               return;
+
+       --data->balance;
+
+       data->n_addrs = 0;
+
+       if (!(data->flags & X86_EFLAGS_TF))
+               regs->flags &= ~X86_EFLAGS_TF;
+       if (data->flags & X86_EFLAGS_IF)
+               regs->flags |= X86_EFLAGS_IF;
+}
+
+void kmemcheck_show_pages(struct page *p, unsigned int n)
+{
+       unsigned int i;
+
+       for (i = 0; i < n; ++i) {
+               unsigned long address;
+               pte_t *pte;
+               unsigned int level;
+
+               address = (unsigned long) page_address(&p[i]);
+               pte = lookup_address(address, &level);
+               BUG_ON(!pte);
+               BUG_ON(level != PG_LEVEL_4K);
+
+               set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+               set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
+               __flush_tlb_one(address);
+       }
+}
+
+bool kmemcheck_page_is_tracked(struct page *p)
+{
+       /* This will also check the "hidden" flag of the PTE. */
+       return kmemcheck_pte_lookup((unsigned long) page_address(p));
+}
+
+void kmemcheck_hide_pages(struct page *p, unsigned int n)
+{
+       unsigned int i;
+
+       for (i = 0; i < n; ++i) {
+               unsigned long address;
+               pte_t *pte;
+               unsigned int level;
+
+               address = (unsigned long) page_address(&p[i]);
+               pte = lookup_address(address, &level);
+               BUG_ON(!pte);
+               BUG_ON(level != PG_LEVEL_4K);
+
+               set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+               set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
+               __flush_tlb_one(address);
+       }
+}
+
+/* Access may NOT cross page boundary */
+static void kmemcheck_read_strict(struct pt_regs *regs,
+       unsigned long addr, unsigned int size)
+{
+       void *shadow;
+       enum kmemcheck_shadow status;
+
+       shadow = kmemcheck_shadow_lookup(addr);
+       if (!shadow)
+               return;
+
+       kmemcheck_save_addr(addr);
+       status = kmemcheck_shadow_test(shadow, size);
+       if (status == KMEMCHECK_SHADOW_INITIALIZED)
+               return;
+
+       if (kmemcheck_enabled)
+               kmemcheck_error_save(status, addr, size, regs);
+
+       if (kmemcheck_enabled == 2)
+               kmemcheck_enabled = 0;
+
+       /* Don't warn about it again. */
+       kmemcheck_shadow_set(shadow, size);
+}
+
+/* Access may cross page boundary */
+static void kmemcheck_read(struct pt_regs *regs,
+       unsigned long addr, unsigned int size)
+{
+       unsigned long page = addr & PAGE_MASK;
+       unsigned long next_addr = addr + size - 1;
+       unsigned long next_page = next_addr & PAGE_MASK;
+
+       if (likely(page == next_page)) {
+               kmemcheck_read_strict(regs, addr, size);
+               return;
+       }
+
+       /*
+        * What we do is basically to split the access across the
+        * two pages and handle each part separately. Yes, this means
+        * that we may now see reads that are 3 + 5 bytes, for
+        * example (and if both are uninitialized, there will be two
+        * reports), but it makes the code a lot simpler.
+        */
+       kmemcheck_read_strict(regs, addr, next_page - addr);
+       kmemcheck_read_strict(regs, next_page, next_addr - next_page);
+}
+
+static void kmemcheck_write_strict(struct pt_regs *regs,
+       unsigned long addr, unsigned int size)
+{
+       void *shadow;
+
+       shadow = kmemcheck_shadow_lookup(addr);
+       if (!shadow)
+               return;
+
+       kmemcheck_save_addr(addr);
+       kmemcheck_shadow_set(shadow, size);
+}
+
+static void kmemcheck_write(struct pt_regs *regs,
+       unsigned long addr, unsigned int size)
+{
+       unsigned long page = addr & PAGE_MASK;
+       unsigned long next_addr = addr + size - 1;
+       unsigned long next_page = next_addr & PAGE_MASK;
+
+       if (likely(page == next_page)) {
+               kmemcheck_write_strict(regs, addr, size);
+               return;
+       }
+
+       /* See comment in kmemcheck_read(). */
+       kmemcheck_write_strict(regs, addr, next_page - addr);
+       kmemcheck_write_strict(regs, next_page, next_addr - next_page);
+}
+
+/*
+ * Copying is hard. We have two addresses, each of which may be split across
+ * a page (and each page will have different shadow addresses).
+ */
+static void kmemcheck_copy(struct pt_regs *regs,
+       unsigned long src_addr, unsigned long dst_addr, unsigned int size)
+{
+       uint8_t shadow[8];
+       enum kmemcheck_shadow status;
+
+       unsigned long page;
+       unsigned long next_addr;
+       unsigned long next_page;
+
+       uint8_t *x;
+       unsigned int i;
+       unsigned int n;
+
+       BUG_ON(size > sizeof(shadow));
+
+       page = src_addr & PAGE_MASK;
+       next_addr = src_addr + size - 1;
+       next_page = next_addr & PAGE_MASK;
+
+       if (likely(page == next_page)) {
+               /* Same page */
+               x = kmemcheck_shadow_lookup(src_addr);
+               if (x) {
+                       kmemcheck_save_addr(src_addr);
+                       for (i = 0; i < size; ++i)
+                               shadow[i] = x[i];
+               } else {
+                       for (i = 0; i < size; ++i)
+                               shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+               }
+       } else {
+               n = next_page - src_addr;
+               BUG_ON(n > sizeof(shadow));
+
+               /* First page */
+               x = kmemcheck_shadow_lookup(src_addr);
+               if (x) {
+                       kmemcheck_save_addr(src_addr);
+                       for (i = 0; i < n; ++i)
+                               shadow[i] = x[i];
+               } else {
+                       /* Not tracked */
+                       for (i = 0; i < n; ++i)
+                               shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+               }
+
+               /* Second page */
+               x = kmemcheck_shadow_lookup(next_page);
+               if (x) {
+                       kmemcheck_save_addr(next_page);
+                       for (i = n; i < size; ++i)
+                               shadow[i] = x[i - n];
+               } else {
+                       /* Not tracked */
+                       for (i = n; i < size; ++i)
+                               shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+               }
+       }
+
+       page = dst_addr & PAGE_MASK;
+       next_addr = dst_addr + size - 1;
+       next_page = next_addr & PAGE_MASK;
+
+       if (likely(page == next_page)) {
+               /* Same page */
+               x = kmemcheck_shadow_lookup(dst_addr);
+               if (x) {
+                       kmemcheck_save_addr(dst_addr);
+                       for (i = 0; i < size; ++i) {
+                               x[i] = shadow[i];
+                               shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+                       }
+               }
+       } else {
+               n = next_page - dst_addr;
+               BUG_ON(n > sizeof(shadow));
+
+               /* First page */
+               x = kmemcheck_shadow_lookup(dst_addr);
+               if (x) {
+                       kmemcheck_save_addr(dst_addr);
+                       for (i = 0; i < n; ++i) {
+                               x[i] = shadow[i];
+                               shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+                       }
+               }
+
+               /* Second page */
+               x = kmemcheck_shadow_lookup(next_page);
+               if (x) {
+                       kmemcheck_save_addr(next_page);
+                       for (i = n; i < size; ++i) {
+                               x[i - n] = shadow[i];
+                               shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+                       }
+               }
+       }
+
+       status = kmemcheck_shadow_test(shadow, size);
+       if (status == KMEMCHECK_SHADOW_INITIALIZED)
+               return;
+
+       if (kmemcheck_enabled)
+               kmemcheck_error_save(status, src_addr, size, regs);
+
+       if (kmemcheck_enabled == 2)
+               kmemcheck_enabled = 0;
+}
+
+enum kmemcheck_method {
+       KMEMCHECK_READ,
+       KMEMCHECK_WRITE,
+};
+
+static void kmemcheck_access(struct pt_regs *regs,
+       unsigned long fallback_address, enum kmemcheck_method fallback_method)
+{
+       const uint8_t *insn;
+       const uint8_t *insn_primary;
+       unsigned int size;
+
+       struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+       /* Recursive fault -- ouch. */
+       if (data->busy) {
+               kmemcheck_show_addr(fallback_address);
+               kmemcheck_error_save_bug(regs);
+               return;
+       }
+
+       data->busy = true;
+
+       insn = (const uint8_t *) regs->ip;
+       insn_primary = kmemcheck_opcode_get_primary(insn);
+
+       kmemcheck_opcode_decode(insn, &size);
+
+       switch (insn_primary[0]) {
+#ifdef CONFIG_KMEMCHECK_BITOPS_OK
+               /* AND, OR, XOR */
+               /*
+                * Unfortunately, these instructions have to be excluded from
+                * our regular checking since they access only some (and not
+                * all) bits. This clears out "bogus" bitfield-access warnings.
+                */
+       case 0x80:
+       case 0x81:
+       case 0x82:
+       case 0x83:
+               switch ((insn_primary[1] >> 3) & 7) {
+                       /* OR */
+               case 1:
+                       /* AND */
+               case 4:
+                       /* XOR */
+               case 6:
+                       kmemcheck_write(regs, fallback_address, size);
+                       goto out;
+
+                       /* ADD */
+               case 0:
+                       /* ADC */
+               case 2:
+                       /* SBB */
+               case 3:
+                       /* SUB */
+               case 5:
+                       /* CMP */
+               case 7:
+                       break;
+               }
+               break;
+#endif
+
+               /* MOVS, MOVSB, MOVSW, MOVSD */
+       case 0xa4:
+       case 0xa5:
+               /*
+                * These instructions are special because they take two
+                * addresses, but we only get one page fault.
+                */
+               kmemcheck_copy(regs, regs->si, regs->di, size);
+               goto out;
+
+               /* CMPS, CMPSB, CMPSW, CMPSD */
+       case 0xa6:
+       case 0xa7:
+               kmemcheck_read(regs, regs->si, size);
+               kmemcheck_read(regs, regs->di, size);
+               goto out;
+       }
+
+       /*
+        * If the opcode isn't special in any way, we use the data from the
+        * page fault handler to determine the address and type of memory
+        * access.
+        */
+       switch (fallback_method) {
+       case KMEMCHECK_READ:
+               kmemcheck_read(regs, fallback_address, size);
+               goto out;
+       case KMEMCHECK_WRITE:
+               kmemcheck_write(regs, fallback_address, size);
+               goto out;
+       }
+
+out:
+       data->busy = false;
+}
+
+bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
+       unsigned long error_code)
+{
+       pte_t *pte;
+       unsigned int level;
+
+       /*
+        * XXX: Is it safe to assume that memory accesses from virtual 86
+        * mode or non-kernel code segments will _never_ access kernel
+        * memory (e.g. tracked pages)? For now, we need this to avoid
+        * invoking kmemcheck for PnP BIOS calls.
+        */
+       if (regs->flags & X86_VM_MASK)
+               return false;
+       if (regs->cs != __KERNEL_CS)
+               return false;
+
+       pte = lookup_address(address, &level);
+       if (!pte)
+               return false;
+       if (level != PG_LEVEL_4K)
+               return false;
+       if (!pte_hidden(*pte))
+               return false;
+
+       if (error_code & 2)
+               kmemcheck_access(regs, address, KMEMCHECK_WRITE);
+       else
+               kmemcheck_access(regs, address, KMEMCHECK_READ);
+
+       kmemcheck_show(regs);
+       return true;
+}
+
+bool kmemcheck_trap(struct pt_regs *regs)
+{
+       if (!kmemcheck_active(regs))
+               return false;
+
+       /* We're done. */
+       kmemcheck_hide(regs);
+       return true;
+}
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
new file mode 100644 (file)
index 0000000..a4100b6
--- /dev/null
@@ -0,0 +1,101 @@
+#include <linux/types.h>
+
+#include "opcode.h"
+
+static bool opcode_is_prefix(uint8_t b)
+{
+       return
+               /* Group 1 */
+               b == 0xf0 || b == 0xf2 || b == 0xf3
+               /* Group 2 */
+               || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
+               || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e
+               /* Group 3 */
+               || b == 0x66
+               /* Group 4 */
+               || b == 0x67;
+}
+
+static bool opcode_is_rex_prefix(uint8_t b)
+{
+       return (b & 0xf0) == 0x40;
+}
+
+#define REX_W (1 << 3)
+
+/*
+ * This is a VERY crude opcode decoder. We only need to find the size of the
+ * load/store that caused our #PF and this should work for all the opcodes
+ * that we care about. Moreover, the ones who invented this instruction set
+ * should be shot.
+ */
+void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size)
+{
+       /* Default operand size */
+       int operand_size_override = 4;
+
+       /* prefixes */
+       for (; opcode_is_prefix(*op); ++op) {
+               if (*op == 0x66)
+                       operand_size_override = 2;
+       }
+
+#ifdef CONFIG_X86_64
+       /* REX prefix */
+       if (opcode_is_rex_prefix(*op)) {
+               uint8_t rex = *op;
+
+               ++op;
+               if (rex & REX_W) {
+                       switch (*op) {
+                       case 0x63:
+                               *size = 4;
+                               return;
+                       case 0x0f:
+                               ++op;
+
+                               switch (*op) {
+                               case 0xb6:
+                               case 0xbe:
+                                       *size = 1;
+                                       return;
+                               case 0xb7:
+                               case 0xbf:
+                                       *size = 2;
+                                       return;
+                               }
+
+                               break;
+                       }
+
+                       *size = 8;
+                       return;
+               }
+       }
+#endif
+
+       /* escape opcode */
+       if (*op == 0x0f) {
+               ++op;
+
+               /*
+                * This is move with zero-extend and sign-extend, respectively;
+                * we don't have to think about 0xb6/0xbe, because this is
+                * already handled in the conditional below.
+                */
+               if (*op == 0xb7 || *op == 0xbf)
+                       operand_size_override = 2;
+       }
+
+       *size = (*op & 1) ? operand_size_override : 1;
+}
+
+const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op)
+{
+       /* skip prefixes */
+       while (opcode_is_prefix(*op))
+               ++op;
+       if (opcode_is_rex_prefix(*op))
+               ++op;
+       return op;
+}
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
new file mode 100644 (file)
index 0000000..6956aad
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H
+#define ARCH__X86__MM__KMEMCHECK__OPCODE_H
+
+#include <linux/types.h>
+
+void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size);
+const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
new file mode 100644 (file)
index 0000000..4ead26e
--- /dev/null
@@ -0,0 +1,22 @@
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+
+#include "pte.h"
+
+pte_t *kmemcheck_pte_lookup(unsigned long address)
+{
+       pte_t *pte;
+       unsigned int level;
+
+       pte = lookup_address(address, &level);
+       if (!pte)
+               return NULL;
+       if (level != PG_LEVEL_4K)
+               return NULL;
+       if (!pte_hidden(*pte))
+               return NULL;
+
+       return pte;
+}
+
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
new file mode 100644 (file)
index 0000000..9f59664
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H
+#define ARCH__X86__MM__KMEMCHECK__PTE_H
+
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+
+pte_t *kmemcheck_pte_lookup(unsigned long address);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
new file mode 100644 (file)
index 0000000..5544d36
--- /dev/null
@@ -0,0 +1,153 @@
+#include <linux/kmemcheck.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#include "pte.h"
+#include "shadow.h"
+
+/*
+ * Return the shadow address for the given address. Returns NULL if the
+ * address is not tracked.
+ *
+ * We need to be extremely careful not to follow any invalid pointers,
+ * because this function can be called for *any* possible address.
+ */
+void *kmemcheck_shadow_lookup(unsigned long address)
+{
+       pte_t *pte;
+       struct page *page;
+
+       if (!virt_addr_valid(address))
+               return NULL;
+
+       pte = kmemcheck_pte_lookup(address);
+       if (!pte)
+               return NULL;
+
+       page = virt_to_page(address);
+       if (!page->shadow)
+               return NULL;
+       return page->shadow + (address & (PAGE_SIZE - 1));
+}
+
+static void mark_shadow(void *address, unsigned int n,
+       enum kmemcheck_shadow status)
+{
+       unsigned long addr = (unsigned long) address;
+       unsigned long last_addr = addr + n - 1;
+       unsigned long page = addr & PAGE_MASK;
+       unsigned long last_page = last_addr & PAGE_MASK;
+       unsigned int first_n;
+       void *shadow;
+
+       /* If the memory range crosses a page boundary, stop there. */
+       if (page == last_page)
+               first_n = n;
+       else
+               first_n = page + PAGE_SIZE - addr;
+
+       shadow = kmemcheck_shadow_lookup(addr);
+       if (shadow)
+               memset(shadow, status, first_n);
+
+       addr += first_n;
+       n -= first_n;
+
+       /* Do full-page memset()s. */
+       while (n >= PAGE_SIZE) {
+               shadow = kmemcheck_shadow_lookup(addr);
+               if (shadow)
+                       memset(shadow, status, PAGE_SIZE);
+
+               addr += PAGE_SIZE;
+               n -= PAGE_SIZE;
+       }
+
+       /* Do the remaining page, if any. */
+       if (n > 0) {
+               shadow = kmemcheck_shadow_lookup(addr);
+               if (shadow)
+                       memset(shadow, status, n);
+       }
+}
+
+void kmemcheck_mark_unallocated(void *address, unsigned int n)
+{
+       mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED);
+}
+
+void kmemcheck_mark_uninitialized(void *address, unsigned int n)
+{
+       mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED);
+}
+
+/*
+ * Fill the shadow memory of the given address such that the memory at that
+ * address is marked as being initialized.
+ */
+void kmemcheck_mark_initialized(void *address, unsigned int n)
+{
+       mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED);
+}
+EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized);
+
+void kmemcheck_mark_freed(void *address, unsigned int n)
+{
+       mark_shadow(address, n, KMEMCHECK_SHADOW_FREED);
+}
+
+void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n)
+{
+       unsigned int i;
+
+       for (i = 0; i < n; ++i)
+               kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE);
+}
+
+void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n)
+{
+       unsigned int i;
+
+       for (i = 0; i < n; ++i)
+               kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE);
+}
+
+enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
+{
+       uint8_t *x;
+       unsigned int i;
+
+       x = shadow;
+
+#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
+       /*
+        * Make sure _some_ bytes are initialized. Gcc frequently generates
+        * code to access neighboring bytes.
+        */
+       for (i = 0; i < size; ++i) {
+               if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
+                       return x[i];
+       }
+#else
+       /* All bytes must be initialized. */
+       for (i = 0; i < size; ++i) {
+               if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
+                       return x[i];
+       }
+#endif
+
+       return x[0];
+}
+
+void kmemcheck_shadow_set(void *shadow, unsigned int size)
+{
+       uint8_t *x;
+       unsigned int i;
+
+       x = shadow;
+       for (i = 0; i < size; ++i)
+               x[i] = KMEMCHECK_SHADOW_INITIALIZED;
+}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
new file mode 100644 (file)
index 0000000..af46d9a
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H
+#define ARCH__X86__MM__KMEMCHECK__SHADOW_H
+
+enum kmemcheck_shadow {
+       KMEMCHECK_SHADOW_UNALLOCATED,
+       KMEMCHECK_SHADOW_UNINITIALIZED,
+       KMEMCHECK_SHADOW_INITIALIZED,
+       KMEMCHECK_SHADOW_FREED,
+};
+
+void *kmemcheck_shadow_lookup(unsigned long address);
+
+enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
+void kmemcheck_shadow_set(void *shadow, unsigned int size);
+
+#endif
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
new file mode 100644 (file)
index 0000000..39480c9
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef LINUX_KMEMCHECK_H
+#define LINUX_KMEMCHECK_H
+
+#include <linux/mm_types.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_KMEMCHECK
+extern int kmemcheck_enabled;
+
+int kmemcheck_show_addr(unsigned long address);
+int kmemcheck_hide_addr(unsigned long address);
+#else
+#define kmemcheck_enabled 0
+
+#endif /* CONFIG_KMEMCHECK */
+
+#endif /* LINUX_KMEMCHECK_H */
index 0e80e26..0042090 100644 (file)
@@ -98,6 +98,14 @@ struct page {
 #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
        unsigned long debug_flags;      /* Use atomic bitops on this */
 #endif
+
+#ifdef CONFIG_KMEMCHECK
+       /*
+        * kmemcheck wants to track the status of each byte in a page; this
+        * is a pointer to such a status block. NULL if not tracked.
+        */
+       void *shadow;
+#endif
 };
 
 /*
index 5616661..e3c335e 100644 (file)
@@ -65,6 +65,7 @@
 #include <linux/idr.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
+#include <linux/kmemcheck.h>
 #include <linux/kmemtrace.h>
 #include <trace/boot.h>
 
index ce664f9..9ef80bb 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/security.h>
 #include <linux/ctype.h>
 #include <linux/utsname.h>
+#include <linux/kmemcheck.h>
 #include <linux/smp_lock.h>
 #include <linux/fs.h>
 #include <linux/init.h>
@@ -959,6 +960,17 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
+#ifdef CONFIG_KMEMCHECK
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "kmemcheck",
+               .data           = &kmemcheck_enabled,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+#endif
+
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt