8241264941a2aa4ddae64122260506c277f83152
[linux-2.6.git] / ipc / shm.c
1 /*
2  * linux/ipc/shm.c
3  * Copyright (C) 1992, 1993 Krishna Balasubramanian
4  *       Many improvements/fixes by Bruno Haible.
5  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7  *
8  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15  *
16  * support for audit of ipc object properties and permission changes
17  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18  *
19  * namespaces support
20  * OpenVZ, SWsoft Inc.
21  * Pavel Emelianov <xemul@openvz.org>
22  */
23
24 #include <linux/slab.h>
25 #include <linux/mm.h>
26 #include <linux/hugetlb.h>
27 #include <linux/shm.h>
28 #include <linux/init.h>
29 #include <linux/file.h>
30 #include <linux/mman.h>
31 #include <linux/shmem_fs.h>
32 #include <linux/security.h>
33 #include <linux/syscalls.h>
34 #include <linux/audit.h>
35 #include <linux/capability.h>
36 #include <linux/ptrace.h>
37 #include <linux/seq_file.h>
38 #include <linux/mutex.h>
39 #include <linux/nsproxy.h>
40 #include <linux/mount.h>
41
42 #include <asm/uaccess.h>
43
44 #include "util.h"
45
46 struct shm_file_data {
47         int id;
48         struct ipc_namespace *ns;
49         struct file *file;
50         const struct vm_operations_struct *vm_ops;
51 };
52
53 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
54
55 static const struct file_operations shm_file_operations;
56 static struct vm_operations_struct shm_vm_ops;
57
58 static struct ipc_ids init_shm_ids;
59
60 #define shm_ids(ns)     (*((ns)->ids[IPC_SHM_IDS]))
61
62 #define shm_unlock(shp)                 \
63         ipc_unlock(&(shp)->shm_perm)
64 #define shm_buildid(ns, id, seq)        \
65         ipc_buildid(&shm_ids(ns), id, seq)
66
67 static int newseg(struct ipc_namespace *, struct ipc_params *);
68 static void shm_open(struct vm_area_struct *vma);
69 static void shm_close(struct vm_area_struct *vma);
70 static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
71 #ifdef CONFIG_PROC_FS
72 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
73 #endif
74
75 static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
76 {
77         ns->ids[IPC_SHM_IDS] = ids;
78         ns->shm_ctlmax = SHMMAX;
79         ns->shm_ctlall = SHMALL;
80         ns->shm_ctlmni = SHMMNI;
81         ns->shm_tot = 0;
82         ipc_init_ids(ids);
83 }
84
85 static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
86 {
87         if (shp->shm_nattch){
88                 shp->shm_perm.mode |= SHM_DEST;
89                 /* Do not find it any more */
90                 shp->shm_perm.key = IPC_PRIVATE;
91                 shm_unlock(shp);
92         } else
93                 shm_destroy(ns, shp);
94 }
95
96 int shm_init_ns(struct ipc_namespace *ns)
97 {
98         struct ipc_ids *ids;
99
100         ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
101         if (ids == NULL)
102                 return -ENOMEM;
103
104         __shm_init_ns(ns, ids);
105         return 0;
106 }
107
108 void shm_exit_ns(struct ipc_namespace *ns)
109 {
110         struct shmid_kernel *shp;
111         int next_id;
112         int total, in_use;
113
114         mutex_lock(&shm_ids(ns).mutex);
115
116         in_use = shm_ids(ns).in_use;
117
118         for (total = 0, next_id = 0; total < in_use; next_id++) {
119                 shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
120                 if (shp == NULL)
121                         continue;
122                 ipc_lock_by_ptr(&shp->shm_perm);
123                 do_shm_rmid(ns, shp);
124                 total++;
125         }
126         mutex_unlock(&shm_ids(ns).mutex);
127
128         kfree(ns->ids[IPC_SHM_IDS]);
129         ns->ids[IPC_SHM_IDS] = NULL;
130 }
131
132 void __init shm_init (void)
133 {
134         __shm_init_ns(&init_ipc_ns, &init_shm_ids);
135         ipc_init_proc_interface("sysvipc/shm",
136                                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime\n",
137                                 IPC_SHM_IDS, sysvipc_shm_proc_show);
138 }
139
140 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
141 {
142         return (struct shmid_kernel *) ipc_lock(&shm_ids(ns), id);
143 }
144
145 static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
146                                                 int id)
147 {
148         return (struct shmid_kernel *) ipc_lock_check(&shm_ids(ns), id);
149 }
150
151 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
152 {
153         ipc_rmid(&shm_ids(ns), &s->shm_perm);
154 }
155
156 static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
157 {
158         return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
159 }
160
161
162
163 /* This is called by fork, once for every shm attach. */
164 static void shm_open(struct vm_area_struct *vma)
165 {
166         struct file *file = vma->vm_file;
167         struct shm_file_data *sfd = shm_file_data(file);
168         struct shmid_kernel *shp;
169
170         shp = shm_lock(sfd->ns, sfd->id);
171         BUG_ON(IS_ERR(shp));
172         shp->shm_atim = get_seconds();
173         shp->shm_lprid = task_tgid_vnr(current);
174         shp->shm_nattch++;
175         shm_unlock(shp);
176 }
177
178 /*
179  * shm_destroy - free the struct shmid_kernel
180  *
181  * @shp: struct to free
182  *
183  * It has to be called with shp and shm_ids.mutex locked,
184  * but returns with shp unlocked and freed.
185  */
186 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
187 {
188         ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
189         shm_rmid(ns, shp);
190         shm_unlock(shp);
191         if (!is_file_hugepages(shp->shm_file))
192                 shmem_lock(shp->shm_file, 0, shp->mlock_user);
193         else
194                 user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
195                                                 shp->mlock_user);
196         fput (shp->shm_file);
197         security_shm_free(shp);
198         ipc_rcu_putref(shp);
199 }
200
201 /*
202  * remove the attach descriptor vma.
203  * free memory for segment if it is marked destroyed.
204  * The descriptor has already been removed from the current->mm->mmap list
205  * and will later be kfree()d.
206  */
207 static void shm_close(struct vm_area_struct *vma)
208 {
209         struct file * file = vma->vm_file;
210         struct shm_file_data *sfd = shm_file_data(file);
211         struct shmid_kernel *shp;
212         struct ipc_namespace *ns = sfd->ns;
213
214         mutex_lock(&shm_ids(ns).mutex);
215         /* remove from the list of attaches of the shm segment */
216         shp = shm_lock(ns, sfd->id);
217         BUG_ON(IS_ERR(shp));
218         shp->shm_lprid = task_tgid_vnr(current);
219         shp->shm_dtim = get_seconds();
220         shp->shm_nattch--;
221         if(shp->shm_nattch == 0 &&
222            shp->shm_perm.mode & SHM_DEST)
223                 shm_destroy(ns, shp);
224         else
225                 shm_unlock(shp);
226         mutex_unlock(&shm_ids(ns).mutex);
227 }
228
229 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
230 {
231         struct file *file = vma->vm_file;
232         struct shm_file_data *sfd = shm_file_data(file);
233
234         return sfd->vm_ops->fault(vma, vmf);
235 }
236
237 #ifdef CONFIG_NUMA
238 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
239 {
240         struct file *file = vma->vm_file;
241         struct shm_file_data *sfd = shm_file_data(file);
242         int err = 0;
243         if (sfd->vm_ops->set_policy)
244                 err = sfd->vm_ops->set_policy(vma, new);
245         return err;
246 }
247
248 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
249                                         unsigned long addr)
250 {
251         struct file *file = vma->vm_file;
252         struct shm_file_data *sfd = shm_file_data(file);
253         struct mempolicy *pol = NULL;
254
255         if (sfd->vm_ops->get_policy)
256                 pol = sfd->vm_ops->get_policy(vma, addr);
257         else if (vma->vm_policy)
258                 pol = vma->vm_policy;
259         else
260                 pol = current->mempolicy;
261         return pol;
262 }
263 #endif
264
265 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
266 {
267         struct shm_file_data *sfd = shm_file_data(file);
268         int ret;
269
270         ret = sfd->file->f_op->mmap(sfd->file, vma);
271         if (ret != 0)
272                 return ret;
273         sfd->vm_ops = vma->vm_ops;
274 #ifdef CONFIG_MMU
275         BUG_ON(!sfd->vm_ops->fault);
276 #endif
277         vma->vm_ops = &shm_vm_ops;
278         shm_open(vma);
279
280         return ret;
281 }
282
283 static int shm_release(struct inode *ino, struct file *file)
284 {
285         struct shm_file_data *sfd = shm_file_data(file);
286
287         put_ipc_ns(sfd->ns);
288         shm_file_data(file) = NULL;
289         kfree(sfd);
290         return 0;
291 }
292
293 static int shm_fsync(struct file *file, struct dentry *dentry, int datasync)
294 {
295         int (*fsync) (struct file *, struct dentry *, int datasync);
296         struct shm_file_data *sfd = shm_file_data(file);
297         int ret = -EINVAL;
298
299         fsync = sfd->file->f_op->fsync;
300         if (fsync)
301                 ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync);
302         return ret;
303 }
304
305 static unsigned long shm_get_unmapped_area(struct file *file,
306         unsigned long addr, unsigned long len, unsigned long pgoff,
307         unsigned long flags)
308 {
309         struct shm_file_data *sfd = shm_file_data(file);
310         return get_unmapped_area(sfd->file, addr, len, pgoff, flags);
311 }
312
313 int is_file_shm_hugepages(struct file *file)
314 {
315         int ret = 0;
316
317         if (file->f_op == &shm_file_operations) {
318                 struct shm_file_data *sfd;
319                 sfd = shm_file_data(file);
320                 ret = is_file_hugepages(sfd->file);
321         }
322         return ret;
323 }
324
325 static const struct file_operations shm_file_operations = {
326         .mmap           = shm_mmap,
327         .fsync          = shm_fsync,
328         .release        = shm_release,
329         .get_unmapped_area      = shm_get_unmapped_area,
330 };
331
332 static struct vm_operations_struct shm_vm_ops = {
333         .open   = shm_open,     /* callback for a new vm-area open */
334         .close  = shm_close,    /* callback for when the vm-area is released */
335         .fault  = shm_fault,
336 #if defined(CONFIG_NUMA)
337         .set_policy = shm_set_policy,
338         .get_policy = shm_get_policy,
339 #endif
340 };
341
342 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
343 {
344         key_t key = params->key;
345         int shmflg = params->flg;
346         size_t size = params->u.size;
347         int error;
348         struct shmid_kernel *shp;
349         int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
350         struct file * file;
351         char name[13];
352         int id;
353
354         if (size < SHMMIN || size > ns->shm_ctlmax)
355                 return -EINVAL;
356
357         if (ns->shm_tot + numpages > ns->shm_ctlall)
358                 return -ENOSPC;
359
360         shp = ipc_rcu_alloc(sizeof(*shp));
361         if (!shp)
362                 return -ENOMEM;
363
364         shp->shm_perm.key = key;
365         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
366         shp->mlock_user = NULL;
367
368         shp->shm_perm.security = NULL;
369         error = security_shm_alloc(shp);
370         if (error) {
371                 ipc_rcu_putref(shp);
372                 return error;
373         }
374
375         sprintf (name, "SYSV%08x", key);
376         if (shmflg & SHM_HUGETLB) {
377                 /* hugetlb_file_setup takes care of mlock user accounting */
378                 file = hugetlb_file_setup(name, size);
379                 shp->mlock_user = current->user;
380         } else {
381                 int acctflag = VM_ACCOUNT;
382                 /*
383                  * Do not allow no accounting for OVERCOMMIT_NEVER, even
384                  * if it's asked for.
385                  */
386                 if  ((shmflg & SHM_NORESERVE) &&
387                                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
388                         acctflag = 0;
389                 file = shmem_file_setup(name, size, acctflag);
390         }
391         error = PTR_ERR(file);
392         if (IS_ERR(file))
393                 goto no_file;
394
395         error = -ENOSPC;
396         id = shm_addid(ns, shp);
397         if(id == -1) 
398                 goto no_id;
399
400         shp->shm_cprid = task_tgid_vnr(current);
401         shp->shm_lprid = 0;
402         shp->shm_atim = shp->shm_dtim = 0;
403         shp->shm_ctim = get_seconds();
404         shp->shm_segsz = size;
405         shp->shm_nattch = 0;
406         shp->shm_perm.id = shm_buildid(ns, id, shp->shm_perm.seq);
407         shp->shm_file = file;
408         /*
409          * shmid gets reported as "inode#" in /proc/pid/maps.
410          * proc-ps tools use this. Changing this will break them.
411          */
412         file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
413
414         ns->shm_tot += numpages;
415         error = shp->shm_perm.id;
416         shm_unlock(shp);
417         return error;
418
419 no_id:
420         fput(file);
421 no_file:
422         security_shm_free(shp);
423         ipc_rcu_putref(shp);
424         return error;
425 }
426
427 static inline int shm_security(void *shp, int shmflg)
428 {
429         return security_shm_associate((struct shmid_kernel *) shp, shmflg);
430 }
431
432 static inline int shm_more_checks(void *shp, struct ipc_params *params)
433 {
434         if (((struct shmid_kernel *)shp)->shm_segsz < params->u.size)
435                 return -EINVAL;
436
437         return 0;
438 }
439
440 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
441 {
442         struct ipc_namespace *ns;
443         struct ipc_ops shm_ops;
444         struct ipc_params shm_params;
445
446         ns = current->nsproxy->ipc_ns;
447
448         shm_ops.getnew = newseg;
449         shm_ops.associate = shm_security;
450         shm_ops.more_checks = shm_more_checks;
451
452         shm_params.key = key;
453         shm_params.flg = shmflg;
454         shm_params.u.size = size;
455
456         return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
457 }
458
459 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
460 {
461         switch(version) {
462         case IPC_64:
463                 return copy_to_user(buf, in, sizeof(*in));
464         case IPC_OLD:
465             {
466                 struct shmid_ds out;
467
468                 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
469                 out.shm_segsz   = in->shm_segsz;
470                 out.shm_atime   = in->shm_atime;
471                 out.shm_dtime   = in->shm_dtime;
472                 out.shm_ctime   = in->shm_ctime;
473                 out.shm_cpid    = in->shm_cpid;
474                 out.shm_lpid    = in->shm_lpid;
475                 out.shm_nattch  = in->shm_nattch;
476
477                 return copy_to_user(buf, &out, sizeof(out));
478             }
479         default:
480                 return -EINVAL;
481         }
482 }
483
484 struct shm_setbuf {
485         uid_t   uid;
486         gid_t   gid;
487         mode_t  mode;
488 };      
489
490 static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version)
491 {
492         switch(version) {
493         case IPC_64:
494             {
495                 struct shmid64_ds tbuf;
496
497                 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
498                         return -EFAULT;
499
500                 out->uid        = tbuf.shm_perm.uid;
501                 out->gid        = tbuf.shm_perm.gid;
502                 out->mode       = tbuf.shm_perm.mode;
503
504                 return 0;
505             }
506         case IPC_OLD:
507             {
508                 struct shmid_ds tbuf_old;
509
510                 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
511                         return -EFAULT;
512
513                 out->uid        = tbuf_old.shm_perm.uid;
514                 out->gid        = tbuf_old.shm_perm.gid;
515                 out->mode       = tbuf_old.shm_perm.mode;
516
517                 return 0;
518             }
519         default:
520                 return -EINVAL;
521         }
522 }
523
524 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
525 {
526         switch(version) {
527         case IPC_64:
528                 return copy_to_user(buf, in, sizeof(*in));
529         case IPC_OLD:
530             {
531                 struct shminfo out;
532
533                 if(in->shmmax > INT_MAX)
534                         out.shmmax = INT_MAX;
535                 else
536                         out.shmmax = (int)in->shmmax;
537
538                 out.shmmin      = in->shmmin;
539                 out.shmmni      = in->shmmni;
540                 out.shmseg      = in->shmseg;
541                 out.shmall      = in->shmall; 
542
543                 return copy_to_user(buf, &out, sizeof(out));
544             }
545         default:
546                 return -EINVAL;
547         }
548 }
549
550 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
551                 unsigned long *swp)
552 {
553         int next_id;
554         int total, in_use;
555
556         *rss = 0;
557         *swp = 0;
558
559         in_use = shm_ids(ns).in_use;
560
561         for (total = 0, next_id = 0; total < in_use; next_id++) {
562                 struct shmid_kernel *shp;
563                 struct inode *inode;
564
565                 /*
566                  * idr_find() is called via shm_get(), so with shm_ids.mutex
567                  * locked. Since ipc_addid() is also called with
568                  * shm_ids.mutex down, there is no need to add read barriers
569                  * here to gurantee the writes in ipc_addid() are seen in
570                  * order here (for Alpha).
571                  * However idr_find() itself does not necessary require
572                  * ipc_ids.mutex down. So if idr_find() is used by other
573                  * places without ipc_ids.mutex down, then it needs read
574                  * read memory barriers as ipc_lock() does.
575                  */
576
577                 shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
578                 if (shp == NULL)
579                         continue;
580
581                 inode = shp->shm_file->f_path.dentry->d_inode;
582
583                 if (is_file_hugepages(shp->shm_file)) {
584                         struct address_space *mapping = inode->i_mapping;
585                         *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages;
586                 } else {
587                         struct shmem_inode_info *info = SHMEM_I(inode);
588                         spin_lock(&info->lock);
589                         *rss += inode->i_mapping->nrpages;
590                         *swp += info->swapped;
591                         spin_unlock(&info->lock);
592                 }
593
594                 total++;
595         }
596 }
597
598 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
599 {
600         struct shm_setbuf setbuf;
601         struct shmid_kernel *shp;
602         int err, version;
603         struct ipc_namespace *ns;
604
605         if (cmd < 0 || shmid < 0) {
606                 err = -EINVAL;
607                 goto out;
608         }
609
610         version = ipc_parse_version(&cmd);
611         ns = current->nsproxy->ipc_ns;
612
613         switch (cmd) { /* replace with proc interface ? */
614         case IPC_INFO:
615         {
616                 struct shminfo64 shminfo;
617
618                 err = security_shm_shmctl(NULL, cmd);
619                 if (err)
620                         return err;
621
622                 memset(&shminfo,0,sizeof(shminfo));
623                 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
624                 shminfo.shmmax = ns->shm_ctlmax;
625                 shminfo.shmall = ns->shm_ctlall;
626
627                 shminfo.shmmin = SHMMIN;
628                 if(copy_shminfo_to_user (buf, &shminfo, version))
629                         return -EFAULT;
630                 /* reading a integer is always atomic */
631                 err = ipc_get_maxid(&shm_ids(ns));
632                 if(err<0)
633                         err = 0;
634                 goto out;
635         }
636         case SHM_INFO:
637         {
638                 struct shm_info shm_info;
639
640                 err = security_shm_shmctl(NULL, cmd);
641                 if (err)
642                         return err;
643
644                 memset(&shm_info,0,sizeof(shm_info));
645                 mutex_lock(&shm_ids(ns).mutex);
646                 shm_info.used_ids = shm_ids(ns).in_use;
647                 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
648                 shm_info.shm_tot = ns->shm_tot;
649                 shm_info.swap_attempts = 0;
650                 shm_info.swap_successes = 0;
651                 err = ipc_get_maxid(&shm_ids(ns));
652                 mutex_unlock(&shm_ids(ns).mutex);
653                 if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
654                         err = -EFAULT;
655                         goto out;
656                 }
657
658                 err = err < 0 ? 0 : err;
659                 goto out;
660         }
661         case SHM_STAT:
662         case IPC_STAT:
663         {
664                 struct shmid64_ds tbuf;
665                 int result;
666
667                 if (!buf) {
668                         err = -EFAULT;
669                         goto out;
670                 }
671
672                 if (cmd == SHM_STAT) {
673                         shp = shm_lock(ns, shmid);
674                         if (IS_ERR(shp)) {
675                                 err = PTR_ERR(shp);
676                                 goto out;
677                         }
678                         result = shp->shm_perm.id;
679                 } else {
680                         shp = shm_lock_check(ns, shmid);
681                         if (IS_ERR(shp)) {
682                                 err = PTR_ERR(shp);
683                                 goto out;
684                         }
685                         result = 0;
686                 }
687                 err=-EACCES;
688                 if (ipcperms (&shp->shm_perm, S_IRUGO))
689                         goto out_unlock;
690                 err = security_shm_shmctl(shp, cmd);
691                 if (err)
692                         goto out_unlock;
693                 memset(&tbuf, 0, sizeof(tbuf));
694                 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
695                 tbuf.shm_segsz  = shp->shm_segsz;
696                 tbuf.shm_atime  = shp->shm_atim;
697                 tbuf.shm_dtime  = shp->shm_dtim;
698                 tbuf.shm_ctime  = shp->shm_ctim;
699                 tbuf.shm_cpid   = shp->shm_cprid;
700                 tbuf.shm_lpid   = shp->shm_lprid;
701                 tbuf.shm_nattch = shp->shm_nattch;
702                 shm_unlock(shp);
703                 if(copy_shmid_to_user (buf, &tbuf, version))
704                         err = -EFAULT;
705                 else
706                         err = result;
707                 goto out;
708         }
709         case SHM_LOCK:
710         case SHM_UNLOCK:
711         {
712                 shp = shm_lock_check(ns, shmid);
713                 if (IS_ERR(shp)) {
714                         err = PTR_ERR(shp);
715                         goto out;
716                 }
717
718                 err = audit_ipc_obj(&(shp->shm_perm));
719                 if (err)
720                         goto out_unlock;
721
722                 if (!capable(CAP_IPC_LOCK)) {
723                         err = -EPERM;
724                         if (current->euid != shp->shm_perm.uid &&
725                             current->euid != shp->shm_perm.cuid)
726                                 goto out_unlock;
727                         if (cmd == SHM_LOCK &&
728                             !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
729                                 goto out_unlock;
730                 }
731
732                 err = security_shm_shmctl(shp, cmd);
733                 if (err)
734                         goto out_unlock;
735                 
736                 if(cmd==SHM_LOCK) {
737                         struct user_struct * user = current->user;
738                         if (!is_file_hugepages(shp->shm_file)) {
739                                 err = shmem_lock(shp->shm_file, 1, user);
740                                 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
741                                         shp->shm_perm.mode |= SHM_LOCKED;
742                                         shp->mlock_user = user;
743                                 }
744                         }
745                 } else if (!is_file_hugepages(shp->shm_file)) {
746                         shmem_lock(shp->shm_file, 0, shp->mlock_user);
747                         shp->shm_perm.mode &= ~SHM_LOCKED;
748                         shp->mlock_user = NULL;
749                 }
750                 shm_unlock(shp);
751                 goto out;
752         }
753         case IPC_RMID:
754         {
755                 /*
756                  *      We cannot simply remove the file. The SVID states
757                  *      that the block remains until the last person
758                  *      detaches from it, then is deleted. A shmat() on
759                  *      an RMID segment is legal in older Linux and if 
760                  *      we change it apps break...
761                  *
762                  *      Instead we set a destroyed flag, and then blow
763                  *      the name away when the usage hits zero.
764                  */
765                 mutex_lock(&shm_ids(ns).mutex);
766                 shp = shm_lock_check(ns, shmid);
767                 if (IS_ERR(shp)) {
768                         err = PTR_ERR(shp);
769                         goto out_up;
770                 }
771
772                 err = audit_ipc_obj(&(shp->shm_perm));
773                 if (err)
774                         goto out_unlock_up;
775
776                 if (current->euid != shp->shm_perm.uid &&
777                     current->euid != shp->shm_perm.cuid && 
778                     !capable(CAP_SYS_ADMIN)) {
779                         err=-EPERM;
780                         goto out_unlock_up;
781                 }
782
783                 err = security_shm_shmctl(shp, cmd);
784                 if (err)
785                         goto out_unlock_up;
786
787                 do_shm_rmid(ns, shp);
788                 mutex_unlock(&shm_ids(ns).mutex);
789                 goto out;
790         }
791
792         case IPC_SET:
793         {
794                 if (!buf) {
795                         err = -EFAULT;
796                         goto out;
797                 }
798
799                 if (copy_shmid_from_user (&setbuf, buf, version)) {
800                         err = -EFAULT;
801                         goto out;
802                 }
803                 mutex_lock(&shm_ids(ns).mutex);
804                 shp = shm_lock_check(ns, shmid);
805                 if (IS_ERR(shp)) {
806                         err = PTR_ERR(shp);
807                         goto out_up;
808                 }
809                 err = audit_ipc_obj(&(shp->shm_perm));
810                 if (err)
811                         goto out_unlock_up;
812                 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode);
813                 if (err)
814                         goto out_unlock_up;
815                 err=-EPERM;
816                 if (current->euid != shp->shm_perm.uid &&
817                     current->euid != shp->shm_perm.cuid && 
818                     !capable(CAP_SYS_ADMIN)) {
819                         goto out_unlock_up;
820                 }
821
822                 err = security_shm_shmctl(shp, cmd);
823                 if (err)
824                         goto out_unlock_up;
825                 
826                 shp->shm_perm.uid = setbuf.uid;
827                 shp->shm_perm.gid = setbuf.gid;
828                 shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO)
829                         | (setbuf.mode & S_IRWXUGO);
830                 shp->shm_ctim = get_seconds();
831                 break;
832         }
833
834         default:
835                 err = -EINVAL;
836                 goto out;
837         }
838
839         err = 0;
840 out_unlock_up:
841         shm_unlock(shp);
842 out_up:
843         mutex_unlock(&shm_ids(ns).mutex);
844         goto out;
845 out_unlock:
846         shm_unlock(shp);
847 out:
848         return err;
849 }
850
851 /*
852  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
853  *
854  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
855  * "raddr" thing points to kernel space, and there has to be a wrapper around
856  * this.
857  */
858 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
859 {
860         struct shmid_kernel *shp;
861         unsigned long addr;
862         unsigned long size;
863         struct file * file;
864         int    err;
865         unsigned long flags;
866         unsigned long prot;
867         int acc_mode;
868         unsigned long user_addr;
869         struct ipc_namespace *ns;
870         struct shm_file_data *sfd;
871         struct path path;
872         mode_t f_mode;
873
874         err = -EINVAL;
875         if (shmid < 0)
876                 goto out;
877         else if ((addr = (ulong)shmaddr)) {
878                 if (addr & (SHMLBA-1)) {
879                         if (shmflg & SHM_RND)
880                                 addr &= ~(SHMLBA-1);       /* round down */
881                         else
882 #ifndef __ARCH_FORCE_SHMLBA
883                                 if (addr & ~PAGE_MASK)
884 #endif
885                                         goto out;
886                 }
887                 flags = MAP_SHARED | MAP_FIXED;
888         } else {
889                 if ((shmflg & SHM_REMAP))
890                         goto out;
891
892                 flags = MAP_SHARED;
893         }
894
895         if (shmflg & SHM_RDONLY) {
896                 prot = PROT_READ;
897                 acc_mode = S_IRUGO;
898                 f_mode = FMODE_READ;
899         } else {
900                 prot = PROT_READ | PROT_WRITE;
901                 acc_mode = S_IRUGO | S_IWUGO;
902                 f_mode = FMODE_READ | FMODE_WRITE;
903         }
904         if (shmflg & SHM_EXEC) {
905                 prot |= PROT_EXEC;
906                 acc_mode |= S_IXUGO;
907         }
908
909         /*
910          * We cannot rely on the fs check since SYSV IPC does have an
911          * additional creator id...
912          */
913         ns = current->nsproxy->ipc_ns;
914         shp = shm_lock_check(ns, shmid);
915         if (IS_ERR(shp)) {
916                 err = PTR_ERR(shp);
917                 goto out;
918         }
919
920         err = -EACCES;
921         if (ipcperms(&shp->shm_perm, acc_mode))
922                 goto out_unlock;
923
924         err = security_shm_shmat(shp, shmaddr, shmflg);
925         if (err)
926                 goto out_unlock;
927
928         path.dentry = dget(shp->shm_file->f_path.dentry);
929         path.mnt    = shp->shm_file->f_path.mnt;
930         shp->shm_nattch++;
931         size = i_size_read(path.dentry->d_inode);
932         shm_unlock(shp);
933
934         err = -ENOMEM;
935         sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
936         if (!sfd)
937                 goto out_put_dentry;
938
939         err = -ENOMEM;
940
941         file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
942         if (!file)
943                 goto out_free;
944
945         file->private_data = sfd;
946         file->f_mapping = shp->shm_file->f_mapping;
947         sfd->id = shp->shm_perm.id;
948         sfd->ns = get_ipc_ns(ns);
949         sfd->file = shp->shm_file;
950         sfd->vm_ops = NULL;
951
952         down_write(&current->mm->mmap_sem);
953         if (addr && !(shmflg & SHM_REMAP)) {
954                 err = -EINVAL;
955                 if (find_vma_intersection(current->mm, addr, addr + size))
956                         goto invalid;
957                 /*
958                  * If shm segment goes below stack, make sure there is some
959                  * space left for the stack to grow (at least 4 pages).
960                  */
961                 if (addr < current->mm->start_stack &&
962                     addr > current->mm->start_stack - size - PAGE_SIZE * 5)
963                         goto invalid;
964         }
965                 
966         user_addr = do_mmap (file, addr, size, prot, flags, 0);
967         *raddr = user_addr;
968         err = 0;
969         if (IS_ERR_VALUE(user_addr))
970                 err = (long)user_addr;
971 invalid:
972         up_write(&current->mm->mmap_sem);
973
974         fput(file);
975
976 out_nattch:
977         mutex_lock(&shm_ids(ns).mutex);
978         shp = shm_lock(ns, shmid);
979         BUG_ON(IS_ERR(shp));
980         shp->shm_nattch--;
981         if(shp->shm_nattch == 0 &&
982            shp->shm_perm.mode & SHM_DEST)
983                 shm_destroy(ns, shp);
984         else
985                 shm_unlock(shp);
986         mutex_unlock(&shm_ids(ns).mutex);
987
988 out:
989         return err;
990
991 out_unlock:
992         shm_unlock(shp);
993         goto out;
994
995 out_free:
996         kfree(sfd);
997 out_put_dentry:
998         dput(path.dentry);
999         goto out_nattch;
1000 }
1001
1002 asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg)
1003 {
1004         unsigned long ret;
1005         long err;
1006
1007         err = do_shmat(shmid, shmaddr, shmflg, &ret);
1008         if (err)
1009                 return err;
1010         force_successful_syscall_return();
1011         return (long)ret;
1012 }
1013
1014 /*
1015  * detach and kill segment if marked destroyed.
1016  * The work is done in shm_close.
1017  */
1018 asmlinkage long sys_shmdt(char __user *shmaddr)
1019 {
1020         struct mm_struct *mm = current->mm;
1021         struct vm_area_struct *vma, *next;
1022         unsigned long addr = (unsigned long)shmaddr;
1023         loff_t size = 0;
1024         int retval = -EINVAL;
1025
1026         if (addr & ~PAGE_MASK)
1027                 return retval;
1028
1029         down_write(&mm->mmap_sem);
1030
1031         /*
1032          * This function tries to be smart and unmap shm segments that
1033          * were modified by partial mlock or munmap calls:
1034          * - It first determines the size of the shm segment that should be
1035          *   unmapped: It searches for a vma that is backed by shm and that
1036          *   started at address shmaddr. It records it's size and then unmaps
1037          *   it.
1038          * - Then it unmaps all shm vmas that started at shmaddr and that
1039          *   are within the initially determined size.
1040          * Errors from do_munmap are ignored: the function only fails if
1041          * it's called with invalid parameters or if it's called to unmap
1042          * a part of a vma. Both calls in this function are for full vmas,
1043          * the parameters are directly copied from the vma itself and always
1044          * valid - therefore do_munmap cannot fail. (famous last words?)
1045          */
1046         /*
1047          * If it had been mremap()'d, the starting address would not
1048          * match the usual checks anyway. So assume all vma's are
1049          * above the starting address given.
1050          */
1051         vma = find_vma(mm, addr);
1052
1053         while (vma) {
1054                 next = vma->vm_next;
1055
1056                 /*
1057                  * Check if the starting address would match, i.e. it's
1058                  * a fragment created by mprotect() and/or munmap(), or it
1059                  * otherwise it starts at this address with no hassles.
1060                  */
1061                 if ((vma->vm_ops == &shm_vm_ops) &&
1062                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1063
1064
1065                         size = vma->vm_file->f_path.dentry->d_inode->i_size;
1066                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1067                         /*
1068                          * We discovered the size of the shm segment, so
1069                          * break out of here and fall through to the next
1070                          * loop that uses the size information to stop
1071                          * searching for matching vma's.
1072                          */
1073                         retval = 0;
1074                         vma = next;
1075                         break;
1076                 }
1077                 vma = next;
1078         }
1079
1080         /*
1081          * We need look no further than the maximum address a fragment
1082          * could possibly have landed at. Also cast things to loff_t to
1083          * prevent overflows and make comparisions vs. equal-width types.
1084          */
1085         size = PAGE_ALIGN(size);
1086         while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1087                 next = vma->vm_next;
1088
1089                 /* finding a matching vma now does not alter retval */
1090                 if ((vma->vm_ops == &shm_vm_ops) &&
1091                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
1092
1093                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1094                 vma = next;
1095         }
1096
1097         up_write(&mm->mmap_sem);
1098         return retval;
1099 }
1100
1101 #ifdef CONFIG_PROC_FS
1102 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1103 {
1104         struct shmid_kernel *shp = it;
1105         char *format;
1106
1107 #define SMALL_STRING "%10d %10d  %4o %10u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
1108 #define BIG_STRING   "%10d %10d  %4o %21u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
1109
1110         if (sizeof(size_t) <= sizeof(int))
1111                 format = SMALL_STRING;
1112         else
1113                 format = BIG_STRING;
1114         return seq_printf(s, format,
1115                           shp->shm_perm.key,
1116                           shp->shm_perm.id,
1117                           shp->shm_perm.mode,
1118                           shp->shm_segsz,
1119                           shp->shm_cprid,
1120                           shp->shm_lprid,
1121                           shp->shm_nattch,
1122                           shp->shm_perm.uid,
1123                           shp->shm_perm.gid,
1124                           shp->shm_perm.cuid,
1125                           shp->shm_perm.cgid,
1126                           shp->shm_atim,
1127                           shp->shm_dtim,
1128                           shp->shm_ctim);
1129 }
1130 #endif