mm: dirty page tracking race fix
[linux-2.6.git] / mm / mlock.c
1 /*
2  *      linux/mm/mlock.c
3  *
4  *  (C) Copyright 1995 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  */
7
8 #include <linux/capability.h>
9 #include <linux/mman.h>
10 #include <linux/mm.h>
11 #include <linux/mempolicy.h>
12 #include <linux/syscalls.h>
13 #include <linux/sched.h>
14 #include <linux/module.h>
15
16 int can_do_mlock(void)
17 {
18         if (capable(CAP_IPC_LOCK))
19                 return 1;
20         if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
21                 return 1;
22         return 0;
23 }
24 EXPORT_SYMBOL(can_do_mlock);
25
26 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
27         unsigned long start, unsigned long end, unsigned int newflags)
28 {
29         struct mm_struct * mm = vma->vm_mm;
30         pgoff_t pgoff;
31         int pages;
32         int ret = 0;
33
34         if (newflags == vma->vm_flags) {
35                 *prev = vma;
36                 goto out;
37         }
38
39         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
40         *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
41                           vma->vm_file, pgoff, vma_policy(vma));
42         if (*prev) {
43                 vma = *prev;
44                 goto success;
45         }
46
47         *prev = vma;
48
49         if (start != vma->vm_start) {
50                 ret = split_vma(mm, vma, start, 1);
51                 if (ret)
52                         goto out;
53         }
54
55         if (end != vma->vm_end) {
56                 ret = split_vma(mm, vma, end, 0);
57                 if (ret)
58                         goto out;
59         }
60
61 success:
62         /*
63          * vm_flags is protected by the mmap_sem held in write mode.
64          * It's okay if try_to_unmap_one unmaps a page just after we
65          * set VM_LOCKED, make_pages_present below will bring it back.
66          */
67         vma->vm_flags = newflags;
68
69         /*
70          * Keep track of amount of locked VM.
71          */
72         pages = (end - start) >> PAGE_SHIFT;
73         if (newflags & VM_LOCKED) {
74                 pages = -pages;
75                 if (!(newflags & VM_IO))
76                         ret = make_pages_present(start, end);
77         }
78
79         mm->locked_vm -= pages;
80 out:
81         return ret;
82 }
83
84 static int do_mlock(unsigned long start, size_t len, int on)
85 {
86         unsigned long nstart, end, tmp;
87         struct vm_area_struct * vma, * prev;
88         int error;
89
90         len = PAGE_ALIGN(len);
91         end = start + len;
92         if (end < start)
93                 return -EINVAL;
94         if (end == start)
95                 return 0;
96         vma = find_vma_prev(current->mm, start, &prev);
97         if (!vma || vma->vm_start > start)
98                 return -ENOMEM;
99
100         if (start > vma->vm_start)
101                 prev = vma;
102
103         for (nstart = start ; ; ) {
104                 unsigned int newflags;
105
106                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
107
108                 newflags = vma->vm_flags | VM_LOCKED;
109                 if (!on)
110                         newflags &= ~VM_LOCKED;
111
112                 tmp = vma->vm_end;
113                 if (tmp > end)
114                         tmp = end;
115                 error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
116                 if (error)
117                         break;
118                 nstart = tmp;
119                 if (nstart < prev->vm_end)
120                         nstart = prev->vm_end;
121                 if (nstart >= end)
122                         break;
123
124                 vma = prev->vm_next;
125                 if (!vma || vma->vm_start != nstart) {
126                         error = -ENOMEM;
127                         break;
128                 }
129         }
130         return error;
131 }
132
133 asmlinkage long sys_mlock(unsigned long start, size_t len)
134 {
135         unsigned long locked;
136         unsigned long lock_limit;
137         int error = -ENOMEM;
138
139         if (!can_do_mlock())
140                 return -EPERM;
141
142         down_write(&current->mm->mmap_sem);
143         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
144         start &= PAGE_MASK;
145
146         locked = len >> PAGE_SHIFT;
147         locked += current->mm->locked_vm;
148
149         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
150         lock_limit >>= PAGE_SHIFT;
151
152         /* check against resource limits */
153         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
154                 error = do_mlock(start, len, 1);
155         up_write(&current->mm->mmap_sem);
156         return error;
157 }
158
159 asmlinkage long sys_munlock(unsigned long start, size_t len)
160 {
161         int ret;
162
163         down_write(&current->mm->mmap_sem);
164         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
165         start &= PAGE_MASK;
166         ret = do_mlock(start, len, 0);
167         up_write(&current->mm->mmap_sem);
168         return ret;
169 }
170
171 static int do_mlockall(int flags)
172 {
173         struct vm_area_struct * vma, * prev = NULL;
174         unsigned int def_flags = 0;
175
176         if (flags & MCL_FUTURE)
177                 def_flags = VM_LOCKED;
178         current->mm->def_flags = def_flags;
179         if (flags == MCL_FUTURE)
180                 goto out;
181
182         for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
183                 unsigned int newflags;
184
185                 newflags = vma->vm_flags | VM_LOCKED;
186                 if (!(flags & MCL_CURRENT))
187                         newflags &= ~VM_LOCKED;
188
189                 /* Ignore errors */
190                 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
191         }
192 out:
193         return 0;
194 }
195
196 asmlinkage long sys_mlockall(int flags)
197 {
198         unsigned long lock_limit;
199         int ret = -EINVAL;
200
201         if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
202                 goto out;
203
204         ret = -EPERM;
205         if (!can_do_mlock())
206                 goto out;
207
208         down_write(&current->mm->mmap_sem);
209
210         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
211         lock_limit >>= PAGE_SHIFT;
212
213         ret = -ENOMEM;
214         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
215             capable(CAP_IPC_LOCK))
216                 ret = do_mlockall(flags);
217         up_write(&current->mm->mmap_sem);
218 out:
219         return ret;
220 }
221
222 asmlinkage long sys_munlockall(void)
223 {
224         int ret;
225
226         down_write(&current->mm->mmap_sem);
227         ret = do_mlockall(0);
228         up_write(&current->mm->mmap_sem);
229         return ret;
230 }
231
232 /*
233  * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
234  * shm segments) get accounted against the user_struct instead.
235  */
236 static DEFINE_SPINLOCK(shmlock_user_lock);
237
238 int user_shm_lock(size_t size, struct user_struct *user)
239 {
240         unsigned long lock_limit, locked;
241         int allowed = 0;
242
243         locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
244         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
245         if (lock_limit == RLIM_INFINITY)
246                 allowed = 1;
247         lock_limit >>= PAGE_SHIFT;
248         spin_lock(&shmlock_user_lock);
249         if (!allowed &&
250             locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
251                 goto out;
252         get_uid(user);
253         user->locked_shm += locked;
254         allowed = 1;
255 out:
256         spin_unlock(&shmlock_user_lock);
257         return allowed;
258 }
259
260 void user_shm_unlock(size_t size, struct user_struct *user)
261 {
262         spin_lock(&shmlock_user_lock);
263         user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
264         spin_unlock(&shmlock_user_lock);
265         free_uid(user);
266 }