tracing: Add tracepoints for hotplug
[linux-2.6.git] / kernel / sysctl.c
index 30acc6c..fd15163 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/signal.h>
+#include <linux/printk.h>
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/ctype.h>
 #include <linux/highuid.h>
 #include <linux/writeback.h>
 #include <linux/ratelimit.h>
+#include <linux/compaction.h>
 #include <linux/hugetlb.h>
 #include <linux/initrd.h>
 #include <linux/key.h>
 #include <linux/times.h>
 #include <linux/limits.h>
 #include <linux/dcache.h>
+#include <linux/dnotify.h>
 #include <linux/syscalls.h>
 #include <linux/vmstat.h>
 #include <linux/nfs_fs.h>
 #include <linux/acpi.h>
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
-#include <linux/slow-work.h>
 #include <linux/perf_event.h>
 #include <linux/kprobes.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
+#include <linux/kmod.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <scsi/sg.h>
 #endif
 
+#ifdef CONFIG_LOCKUP_DETECTOR
+#include <linux/nmi.h>
+#endif
+
 
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
-extern int sysctl_panic_on_oom;
-extern int sysctl_oom_kill_allocating_task;
-extern int sysctl_oom_dump_tasks;
 extern int max_threads;
 extern int core_uses_pid;
 extern int suid_dumpable;
@@ -90,6 +96,7 @@ extern char core_pattern[];
 extern unsigned int core_pipe_limit;
 extern int pid_max;
 extern int min_free_kbytes;
+extern int min_free_order_shift;
 extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
@@ -104,7 +111,7 @@ extern int blk_iopoll_enabled;
 #endif
 
 /* Constants used for minimum and  maximum */
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#ifdef CONFIG_LOCKUP_DETECTOR
 static int sixty = 60;
 static int neg_one = -1;
 #endif
@@ -112,6 +119,7 @@ static int neg_one = -1;
 static int zero;
 static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
+static int __maybe_unused three = 3;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
 #ifdef CONFIG_PRINTK
@@ -128,6 +136,9 @@ static int min_percpu_pagelist_fract = 8;
 
 static int ngroups_max = NGROUPS_MAX;
 
+#ifdef CONFIG_INOTIFY_USER
+#include <linux/inotify.h>
+#endif
 #ifdef CONFIG_SPARC
 #include <asm/system.h>
 #endif
@@ -154,8 +165,6 @@ extern int no_unaligned_warning;
 extern int unaligned_dump_stack;
 #endif
 
-extern struct ratelimit_state printk_ratelimit_state;
-
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -163,8 +172,14 @@ static int proc_taint(struct ctl_table *table, int write,
                               void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
+#ifdef CONFIG_PRINTK
+static int proc_dmesg_restrict(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
 #ifdef CONFIG_MAGIC_SYSRQ
-static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */
+/* Note: sysrq code uses it's own private copy */
+static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
 
 static int sysrq_sysctl_handler(ctl_table *table, int write,
                                void __user *buffer, size_t *lenp,
@@ -187,9 +202,9 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
 static struct ctl_table root_table[];
 static struct ctl_table_root sysctl_table_root;
 static struct ctl_table_header root_table_header = {
-       .count = 1,
+       {{.count = 1,
        .ctl_table = root_table,
-       .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
+       .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
        .root = &sysctl_table_root,
        .set = &sysctl_table_root.default_set,
 };
@@ -204,9 +219,6 @@ static struct ctl_table fs_table[];
 static struct ctl_table debug_table[];
 static struct ctl_table dev_table[];
 extern struct ctl_table random_table[];
-#ifdef CONFIG_INOTIFY_USER
-extern struct ctl_table inotify_table[];
-#endif
 #ifdef CONFIG_EPOLL
 extern struct ctl_table epoll_table[];
 #endif
@@ -243,10 +255,6 @@ static struct ctl_table root_table[] = {
                .mode           = 0555,
                .child          = dev_table,
        },
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
        { }
 };
 
@@ -257,8 +265,11 @@ static int min_wakeup_granularity_ns;                      /* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;   /* 1 second */
 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-static int min_sched_shares_ratelimit = 100000; /* 100 usec */
-static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
+#endif
+
+#ifdef CONFIG_COMPACTION
+static int min_extfrag_threshold;
+static int max_extfrag_threshold = 1000;
 #endif
 
 static struct ctl_table kern_table[] = {
@@ -298,15 +309,6 @@ static struct ctl_table kern_table[] = {
                .extra2         = &max_wakeup_granularity_ns,
        },
        {
-               .procname       = "sched_shares_ratelimit",
-               .data           = &sysctl_sched_shares_ratelimit,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = sched_proc_update_handler,
-               .extra1         = &min_sched_shares_ratelimit,
-               .extra2         = &max_sched_shares_ratelimit,
-       },
-       {
                .procname       = "sched_tunable_scaling",
                .data           = &sysctl_sched_tunable_scaling,
                .maxlen         = sizeof(enum sched_tunable_scaling),
@@ -316,14 +318,6 @@ static struct ctl_table kern_table[] = {
                .extra2         = &max_sched_tunable_scaling,
        },
        {
-               .procname       = "sched_shares_thresh",
-               .data           = &sysctl_sched_shares_thresh,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &zero,
-       },
-       {
                .procname       = "sched_migration_cost",
                .data           = &sysctl_sched_migration_cost,
                .maxlen         = sizeof(unsigned int),
@@ -345,6 +339,13 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
        {
+               .procname       = "sched_shares_window",
+               .data           = &sysctl_sched_shares_window,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
                .procname       = "timer_migration",
                .data           = &sysctl_timer_migration,
                .maxlen         = sizeof(unsigned int),
@@ -368,13 +369,17 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = sched_rt_handler,
        },
+#ifdef CONFIG_SCHED_AUTOGROUP
        {
-               .procname       = "sched_compat_yield",
-               .data           = &sysctl_sched_compat_yield,
+               .procname       = "sched_autogroup_enabled",
+               .data           = &sysctl_sched_autogroup_enabled,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
        },
+#endif
 #ifdef CONFIG_PROVE_LOCKING
        {
                .procname       = "prove_locking",
@@ -555,7 +560,7 @@ static struct ctl_table kern_table[] = {
                .extra2         = &one,
        },
 #endif
-#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
+#ifdef CONFIG_HOTPLUG
        {
                .procname       = "hotplug",
                .data           = &uevent_helper,
@@ -613,6 +618,11 @@ static struct ctl_table kern_table[] = {
                .child          = random_table,
        },
        {
+               .procname       = "usermodehelper",
+               .mode           = 0555,
+               .child          = usermodehelper_table,
+       },
+       {
                .procname       = "overflowuid",
                .data           = &overflowuid,
                .maxlen         = sizeof(int),
@@ -695,6 +705,24 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &ten_thousand,
        },
+       {
+               .procname       = "dmesg_restrict",
+               .data           = &dmesg_restrict,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+       {
+               .procname       = "kptr_restrict",
+               .data           = &kptr_restrict,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dmesg_restrict,
+               .extra1         = &zero,
+               .extra2         = &two,
+       },
 #endif
        {
                .procname       = "ngroups_max",
@@ -703,20 +731,51 @@ static struct ctl_table kern_table[] = {
                .mode           = 0444,
                .proc_handler   = proc_dointvec,
        },
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+#if defined(CONFIG_LOCKUP_DETECTOR)
        {
-               .procname       = "unknown_nmi_panic",
-               .data           = &unknown_nmi_panic,
+               .procname       = "watchdog",
+               .data           = &watchdog_enabled,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dowatchdog,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+       {
+               .procname       = "watchdog_thresh",
+               .data           = &watchdog_thresh,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dowatchdog,
+               .extra1         = &neg_one,
+               .extra2         = &sixty,
+       },
+       {
+               .procname       = "softlockup_panic",
+               .data           = &softlockup_panic,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
        },
        {
                .procname       = "nmi_watchdog",
-               .data           = &nmi_watchdog_enabled,
+               .data           = &watchdog_enabled,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_nmi_enabled,
+               .proc_handler   = proc_dowatchdog,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+       {
+               .procname       = "unknown_nmi_panic",
+               .data           = &unknown_nmi_panic,
+               .maxlen         = sizeof (int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
        },
 #endif
 #if defined(CONFIG_X86)
@@ -806,26 +865,6 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
 #endif
-#ifdef CONFIG_DETECT_SOFTLOCKUP
-       {
-               .procname       = "softlockup_panic",
-               .data           = &softlockup_panic,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &zero,
-               .extra2         = &one,
-       },
-       {
-               .procname       = "softlockup_thresh",
-               .data           = &softlockup_thresh,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dosoftlockup_thresh,
-               .extra1         = &neg_one,
-               .extra2         = &sixty,
-       },
-#endif
 #ifdef CONFIG_DETECT_HUNG_TASK
        {
                .procname       = "hung_task_panic",
@@ -899,14 +938,13 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
 #endif
-#ifdef CONFIG_SLOW_WORK
-       {
-               .procname       = "slow-work",
-               .mode           = 0555,
-               .child          = slow_work_sysctls,
-       },
-#endif
 #ifdef CONFIG_PERF_EVENTS
+       /*
+        * User-space scripts rely on the existence of this file
+        * as a feature check for perf_events being enabled.
+        *
+        * So it's an ABI, do not remove!
+        */
        {
                .procname       = "perf_event_paranoid",
                .data           = &sysctl_perf_event_paranoid,
@@ -926,7 +964,7 @@ static struct ctl_table kern_table[] = {
                .data           = &sysctl_perf_event_sample_rate,
                .maxlen         = sizeof(sysctl_perf_event_sample_rate),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = perf_proc_update_handler,
        },
 #endif
 #ifdef CONFIG_KMEMCHECK
@@ -947,10 +985,6 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
 #endif
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
        { }
 };
 
@@ -960,14 +994,18 @@ static struct ctl_table vm_table[] = {
                .data           = &sysctl_overcommit_memory,
                .maxlen         = sizeof(sysctl_overcommit_memory),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &two,
        },
        {
                .procname       = "panic_on_oom",
                .data           = &sysctl_panic_on_oom,
                .maxlen         = sizeof(sysctl_panic_on_oom),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &two,
        },
        {
                .procname       = "oom_kill_allocating_task",
@@ -995,7 +1033,8 @@ static struct ctl_table vm_table[] = {
                .data           = &page_cluster,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
        },
        {
                .procname       = "dirty_background_ratio",
@@ -1043,7 +1082,8 @@ static struct ctl_table vm_table[] = {
                .data           = &dirty_expire_interval,
                .maxlen         = sizeof(dirty_expire_interval),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
        },
        {
                .procname       = "nr_pdflush_threads",
@@ -1119,7 +1159,28 @@ static struct ctl_table vm_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = drop_caches_sysctl_handler,
+               .extra1         = &one,
+               .extra2         = &three,
+       },
+#ifdef CONFIG_COMPACTION
+       {
+               .procname       = "compact_memory",
+               .data           = &sysctl_compact_memory,
+               .maxlen         = sizeof(int),
+               .mode           = 0200,
+               .proc_handler   = sysctl_compaction_handler,
+       },
+       {
+               .procname       = "extfrag_threshold",
+               .data           = &sysctl_extfrag_threshold,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = sysctl_extfrag_handler,
+               .extra1         = &min_extfrag_threshold,
+               .extra2         = &max_extfrag_threshold,
        },
+
+#endif /* CONFIG_COMPACTION */
        {
                .procname       = "min_free_kbytes",
                .data           = &min_free_kbytes,
@@ -1129,6 +1190,13 @@ static struct ctl_table vm_table[] = {
                .extra1         = &zero,
        },
        {
+               .procname       = "min_free_order_shift",
+               .data           = &min_free_order_shift,
+               .maxlen         = sizeof(min_free_order_shift),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec
+       },
+       {
                .procname       = "percpu_pagelist_fraction",
                .data           = &percpu_pagelist_fraction,
                .maxlen         = sizeof(percpu_pagelist_fraction),
@@ -1292,11 +1360,6 @@ static struct ctl_table vm_table[] = {
                .extra2         = &one,
        },
 #endif
-
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
        { }
 };
 
@@ -1312,28 +1375,28 @@ static struct ctl_table fs_table[] = {
                .data           = &inodes_stat,
                .maxlen         = 2*sizeof(int),
                .mode           = 0444,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_nr_inodes,
        },
        {
                .procname       = "inode-state",
                .data           = &inodes_stat,
                .maxlen         = 7*sizeof(int),
                .mode           = 0444,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_nr_inodes,
        },
        {
                .procname       = "file-nr",
                .data           = &files_stat,
-               .maxlen         = 3*sizeof(int),
+               .maxlen         = sizeof(files_stat),
                .mode           = 0444,
                .proc_handler   = proc_nr_files,
        },
        {
                .procname       = "file-max",
                .data           = &files_stat.max_files,
-               .maxlen         = sizeof(int),
+               .maxlen         = sizeof(files_stat.max_files),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_doulongvec_minmax,
        },
        {
                .procname       = "nr_open",
@@ -1349,7 +1412,7 @@ static struct ctl_table fs_table[] = {
                .data           = &dentry_stat,
                .maxlen         = 6*sizeof(int),
                .mode           = 0444,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_nr_dentry,
        },
        {
                .procname       = "overflowuid",
@@ -1444,16 +1507,20 @@ static struct ctl_table fs_table[] = {
                .child          = binfmt_misc_table,
        },
 #endif
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
+       {
+               .procname       = "pipe-max-size",
+               .data           = &pipe_max_size,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &pipe_proc_fn,
+               .extra1         = &pipe_min_size,
+       },
        { }
 };
 
 static struct ctl_table debug_table[] = {
 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
-    defined(CONFIG_S390)
+    defined(CONFIG_S390) || defined(CONFIG_TILE)
        {
                .procname       = "exception-trace",
                .data           = &show_unhandled_signals,
@@ -1535,7 +1602,7 @@ void sysctl_head_put(struct ctl_table_header *head)
 {
        spin_lock(&sysctl_lock);
        if (!--head->count)
-               kfree(head);
+               kfree_rcu(head, rcu);
        spin_unlock(&sysctl_lock);
 }
 
@@ -1649,13 +1716,8 @@ static int test_perm(int mode, int op)
 
 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
 {
-       int error;
        int mode;
 
-       error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
-       if (error)
-               return error;
-
        if (root->permissions)
                mode = root->permissions(root, current->nsproxy, table);
        else
@@ -1677,10 +1739,7 @@ static __init int sysctl_init(void)
 {
        sysctl_set_parent(NULL, root_table);
 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
-       {
-               int err;
-               err = sysctl_check_table(current->nsproxy, root_table);
-       }
+       sysctl_check_table(current->nsproxy, root_table);
 #endif
        return 0;
 }
@@ -1915,10 +1974,10 @@ void unregister_sysctl_table(struct ctl_table_header * header)
        start_unregistering(header);
        if (!--header->parent->count) {
                WARN_ON(1);
-               kfree(header->parent);
+               kfree_rcu(header->parent, rcu);
        }
        if (!--header->count)
-               kfree(header);
+               kfree_rcu(header, rcu);
        spin_unlock(&sysctl_lock);
 }
 
@@ -2253,6 +2312,8 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
                if (write) {
                        left -= proc_skip_spaces(&kbuf);
 
+                       if (!left)
+                               break;
                        err = proc_get_long(&kbuf, &left, &lval, &neg,
                                             proc_wspace_sep,
                                             sizeof(proc_wspace_sep), NULL);
@@ -2279,7 +2340,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 
        if (!write && !first && left && !err)
                err = proc_put_char(&buffer, &left, '\n');
-       if (write && !err)
+       if (write && !err && left)
                left -= proc_skip_spaces(&kbuf);
 free:
        if (write) {
@@ -2357,6 +2418,17 @@ static int proc_taint(struct ctl_table *table, int write,
        return err;
 }
 
+#ifdef CONFIG_PRINTK
+static int proc_dmesg_restrict(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+#endif
+
 struct do_proc_dointvec_minmax_conv_param {
        int *min;
        int *max;
@@ -2450,7 +2522,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
                kbuf[left] = 0;
        }
 
-       for (; left && vleft--; i++, min++, max++, first=0) {
+       for (; left && vleft--; i++, first = 0) {
                unsigned long val;
 
                if (write) {
@@ -2858,7 +2930,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
        }
 }
 
-#else /* CONFIG_PROC_FS */
+#else /* CONFIG_PROC_SYSCTL */
 
 int proc_dostring(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2910,7 +2982,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
 }
 
 
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_PROC_SYSCTL */
 
 /*
  * No sense putting this after each symbol definition, twice,