perf_counter: provide an mlock threshold
Peter Zijlstra [Tue, 5 May 2009 15:50:24 +0000 (17:50 +0200)]
Provide a threshold to relax the mlock accounting, increasing usability.

Each counter gets perf_counter_mlock_kb for free.

[ Impact: allow more mmap buffering ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090505155437.112113632@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

include/linux/perf_counter.h
kernel/perf_counter.c
kernel/sysctl.c

index 0fcbf34..00081d8 100644 (file)
@@ -358,6 +358,7 @@ struct file;
 struct perf_mmap_data {
        struct rcu_head                 rcu_head;
        int                             nr_pages;       /* nr of data pages  */
+       int                             nr_locked;      /* nr pages mlocked  */
 
        atomic_t                        poll;           /* POLL_ for wakeups */
        atomic_t                        head;           /* write position    */
@@ -575,6 +576,7 @@ struct perf_callchain_entry {
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_priv;
+extern int sysctl_perf_counter_mlock;
 
 extern void perf_counter_init(void);
 
index 6e6834e..2d13427 100644 (file)
@@ -44,6 +44,7 @@ static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
+int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1461,7 +1462,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
        if (atomic_dec_and_mutex_lock(&counter->mmap_count,
                                      &counter->mmap_mutex)) {
-               vma->vm_mm->locked_vm -= counter->data->nr_pages + 1;
+               vma->vm_mm->locked_vm -= counter->data->nr_locked;
                perf_mmap_data_free(counter);
                mutex_unlock(&counter->mmap_mutex);
        }
@@ -1480,6 +1481,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        unsigned long nr_pages;
        unsigned long locked, lock_limit;
        int ret = 0;
+       long extra;
 
        if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
                return -EINVAL;
@@ -1507,8 +1509,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                goto unlock;
        }
 
-       locked = vma->vm_mm->locked_vm;
-       locked += nr_pages + 1;
+       extra = nr_pages /* + 1 only account the data pages */;
+       extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+       if (extra < 0)
+               extra = 0;
+
+       locked = vma->vm_mm->locked_vm + extra;
 
        lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
        lock_limit >>= PAGE_SHIFT;
@@ -1524,7 +1530,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                goto unlock;
 
        atomic_set(&counter->mmap_count, 1);
-       vma->vm_mm->locked_vm += nr_pages + 1;
+       vma->vm_mm->locked_vm += extra;
+       counter->data->nr_locked = extra;
 unlock:
        mutex_unlock(&counter->mmap_mutex);
 
index 8203d70..3b05c2b 100644 (file)
@@ -920,6 +920,14 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "perf_counter_mlock_kb",
+               .data           = &sysctl_perf_counter_mlock,
+               .maxlen         = sizeof(sysctl_perf_counter_mlock),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
 #endif
 /*
  * NOTE: do not add new entries to this table unless you have read