hung_task: fix false positive during vfork
[linux-2.6.git] / kernel / sysctl.c
index 99a510c..11d65b5 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/signal.h>
+#include <linux/printk.h>
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/ctype.h>
@@ -55,6 +56,7 @@
 #include <linux/kprobes.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
+#include <linux/kmod.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -116,6 +118,7 @@ static int neg_one = -1;
 static int zero;
 static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
+static int __maybe_unused three = 3;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
 #ifdef CONFIG_PRINTK
@@ -161,8 +164,6 @@ extern int no_unaligned_warning;
 extern int unaligned_dump_stack;
 #endif
 
-extern struct ratelimit_state printk_ratelimit_state;
-
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -170,8 +171,14 @@ static int proc_taint(struct ctl_table *table, int write,
                               void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
+#ifdef CONFIG_PRINTK
+static int proc_dmesg_restrict(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
 #ifdef CONFIG_MAGIC_SYSRQ
-static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */
+/* Note: sysrq code uses it's own private copy */
+static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
 
 static int sysrq_sysctl_handler(ctl_table *table, int write,
                                void __user *buffer, size_t *lenp,
@@ -194,9 +201,9 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
 static struct ctl_table root_table[];
 static struct ctl_table_root sysctl_table_root;
 static struct ctl_table_header root_table_header = {
-       .count = 1,
+       {{.count = 1,
        .ctl_table = root_table,
-       .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
+       .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
        .root = &sysctl_table_root,
        .set = &sysctl_table_root.default_set,
 };
@@ -247,10 +254,6 @@ static struct ctl_table root_table[] = {
                .mode           = 0555,
                .child          = dev_table,
        },
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
        { }
 };
 
@@ -261,8 +264,6 @@ static int min_wakeup_granularity_ns;                       /* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;   /* 1 second */
 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-static int min_sched_shares_ratelimit = 100000; /* 100 usec */
-static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
 #endif
 
 #ifdef CONFIG_COMPACTION
@@ -307,15 +308,6 @@ static struct ctl_table kern_table[] = {
                .extra2         = &max_wakeup_granularity_ns,
        },
        {
-               .procname       = "sched_shares_ratelimit",
-               .data           = &sysctl_sched_shares_ratelimit,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = sched_proc_update_handler,
-               .extra1         = &min_sched_shares_ratelimit,
-               .extra2         = &max_sched_shares_ratelimit,
-       },
-       {
                .procname       = "sched_tunable_scaling",
                .data           = &sysctl_sched_tunable_scaling,
                .maxlen         = sizeof(enum sched_tunable_scaling),
@@ -325,14 +317,6 @@ static struct ctl_table kern_table[] = {
                .extra2         = &max_sched_tunable_scaling,
        },
        {
-               .procname       = "sched_shares_thresh",
-               .data           = &sysctl_sched_shares_thresh,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &zero,
-       },
-       {
                .procname       = "sched_migration_cost",
                .data           = &sysctl_sched_migration_cost,
                .maxlen         = sizeof(unsigned int),
@@ -354,6 +338,13 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
        {
+               .procname       = "sched_shares_window",
+               .data           = &sysctl_sched_shares_window,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
                .procname       = "timer_migration",
                .data           = &sysctl_timer_migration,
                .maxlen         = sizeof(unsigned int),
@@ -377,13 +368,17 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = sched_rt_handler,
        },
+#ifdef CONFIG_SCHED_AUTOGROUP
        {
-               .procname       = "sched_compat_yield",
-               .data           = &sysctl_sched_compat_yield,
+               .procname       = "sched_autogroup_enabled",
+               .data           = &sysctl_sched_autogroup_enabled,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
        },
+#endif
 #ifdef CONFIG_PROVE_LOCKING
        {
                .procname       = "prove_locking",
@@ -622,6 +617,11 @@ static struct ctl_table kern_table[] = {
                .child          = random_table,
        },
        {
+               .procname       = "usermodehelper",
+               .mode           = 0555,
+               .child          = usermodehelper_table,
+       },
+       {
                .procname       = "overflowuid",
                .data           = &overflowuid,
                .maxlen         = sizeof(int),
@@ -704,6 +704,24 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &ten_thousand,
        },
+       {
+               .procname       = "dmesg_restrict",
+               .data           = &dmesg_restrict,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+       {
+               .procname       = "kptr_restrict",
+               .data           = &kptr_restrict,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dmesg_restrict,
+               .extra1         = &zero,
+               .extra2         = &two,
+       },
 #endif
        {
                .procname       = "ngroups_max",
@@ -718,14 +736,16 @@ static struct ctl_table kern_table[] = {
                .data           = &watchdog_enabled,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_dowatchdog_enabled,
+               .proc_handler   = proc_dowatchdog,
+               .extra1         = &zero,
+               .extra2         = &one,
        },
        {
                .procname       = "watchdog_thresh",
-               .data           = &softlockup_thresh,
+               .data           = &watchdog_thresh,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = proc_dowatchdog_thresh,
+               .proc_handler   = proc_dowatchdog,
                .extra1         = &neg_one,
                .extra2         = &sixty,
        },
@@ -738,21 +758,23 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
-#endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
        {
-               .procname       = "unknown_nmi_panic",
-               .data           = &unknown_nmi_panic,
+               .procname       = "nmi_watchdog",
+               .data           = &watchdog_enabled,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dowatchdog,
+               .extra1         = &zero,
+               .extra2         = &one,
        },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
        {
-               .procname       = "nmi_watchdog",
-               .data           = &nmi_watchdog_enabled,
+               .procname       = "unknown_nmi_panic",
+               .data           = &unknown_nmi_panic,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_nmi_enabled,
+               .proc_handler   = proc_dointvec,
        },
 #endif
 #if defined(CONFIG_X86)
@@ -916,6 +938,12 @@ static struct ctl_table kern_table[] = {
        },
 #endif
 #ifdef CONFIG_PERF_EVENTS
+       /*
+        * User-space scripts rely on the existence of this file
+        * as a feature check for perf_events being enabled.
+        *
+        * So it's an ABI, do not remove!
+        */
        {
                .procname       = "perf_event_paranoid",
                .data           = &sysctl_perf_event_paranoid,
@@ -935,7 +963,7 @@ static struct ctl_table kern_table[] = {
                .data           = &sysctl_perf_event_sample_rate,
                .maxlen         = sizeof(sysctl_perf_event_sample_rate),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = perf_proc_update_handler,
        },
 #endif
 #ifdef CONFIG_KMEMCHECK
@@ -956,10 +984,6 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
 #endif
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
        { }
 };
 
@@ -969,14 +993,18 @@ static struct ctl_table vm_table[] = {
                .data           = &sysctl_overcommit_memory,
                .maxlen         = sizeof(sysctl_overcommit_memory),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &two,
        },
        {
                .procname       = "panic_on_oom",
                .data           = &sysctl_panic_on_oom,
                .maxlen         = sizeof(sysctl_panic_on_oom),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &two,
        },
        {
                .procname       = "oom_kill_allocating_task",
@@ -1004,7 +1032,8 @@ static struct ctl_table vm_table[] = {
                .data           = &page_cluster,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
        },
        {
                .procname       = "dirty_background_ratio",
@@ -1052,7 +1081,8 @@ static struct ctl_table vm_table[] = {
                .data           = &dirty_expire_interval,
                .maxlen         = sizeof(dirty_expire_interval),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
        },
        {
                .procname       = "nr_pdflush_threads",
@@ -1128,6 +1158,8 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = drop_caches_sysctl_handler,
+               .extra1         = &one,
+               .extra2         = &three,
        },
 #ifdef CONFIG_COMPACTION
        {
@@ -1320,11 +1352,6 @@ static struct ctl_table vm_table[] = {
                .extra2         = &one,
        },
 #endif
-
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
        { }
 };
 
@@ -1377,7 +1404,7 @@ static struct ctl_table fs_table[] = {
                .data           = &dentry_stat,
                .maxlen         = 6*sizeof(int),
                .mode           = 0444,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_nr_dentry,
        },
        {
                .procname       = "overflowuid",
@@ -1480,16 +1507,12 @@ static struct ctl_table fs_table[] = {
                .proc_handler   = &pipe_proc_fn,
                .extra1         = &pipe_min_size,
        },
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
        { }
 };
 
 static struct ctl_table debug_table[] = {
 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
-    defined(CONFIG_S390)
+    defined(CONFIG_S390) || defined(CONFIG_TILE)
        {
                .procname       = "exception-trace",
                .data           = &show_unhandled_signals,
@@ -1571,7 +1594,7 @@ void sysctl_head_put(struct ctl_table_header *head)
 {
        spin_lock(&sysctl_lock);
        if (!--head->count)
-               kfree(head);
+               kfree_rcu(head, rcu);
        spin_unlock(&sysctl_lock);
 }
 
@@ -1685,13 +1708,8 @@ static int test_perm(int mode, int op)
 
 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
 {
-       int error;
        int mode;
 
-       error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
-       if (error)
-               return error;
-
        if (root->permissions)
                mode = root->permissions(root, current->nsproxy, table);
        else
@@ -1948,10 +1966,10 @@ void unregister_sysctl_table(struct ctl_table_header * header)
        start_unregistering(header);
        if (!--header->parent->count) {
                WARN_ON(1);
-               kfree(header->parent);
+               kfree_rcu(header->parent, rcu);
        }
        if (!--header->count)
-               kfree(header);
+               kfree_rcu(header, rcu);
        spin_unlock(&sysctl_lock);
 }
 
@@ -2392,6 +2410,17 @@ static int proc_taint(struct ctl_table *table, int write,
        return err;
 }
 
+#ifdef CONFIG_PRINTK
+static int proc_dmesg_restrict(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+#endif
+
 struct do_proc_dointvec_minmax_conv_param {
        int *min;
        int *max;
@@ -2893,7 +2922,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
        }
 }
 
-#else /* CONFIG_PROC_FS */
+#else /* CONFIG_PROC_SYSCTL */
 
 int proc_dostring(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2945,7 +2974,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
 }
 
 
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_PROC_SYSCTL */
 
 /*
  * No sense putting this after each symbol definition, twice,