ipc/util: Helpers for making the sysvipc operations pid namespace aware
[linux-2.6-block.git] / ipc / shm.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
1da177e4
LT
2/*
3 * linux/ipc/shm.c
4 * Copyright (C) 1992, 1993 Krishna Balasubramanian
5 * Many improvements/fixes by Bruno Haible.
6 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 *
9 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16 *
073115d6
SG
17 * support for audit of ipc object properties and permission changes
18 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
4e982311
KK
19 *
20 * namespaces support
21 * OpenVZ, SWsoft Inc.
22 * Pavel Emelianov <xemul@openvz.org>
c2c737a0
DB
23 *
24 * Better ipc lock (kern_ipc_perm.lock) handling
25 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
1da177e4
LT
26 */
27
1da177e4
LT
28#include <linux/slab.h>
29#include <linux/mm.h>
30#include <linux/hugetlb.h>
31#include <linux/shm.h>
32#include <linux/init.h>
33#include <linux/file.h>
34#include <linux/mman.h>
1da177e4
LT
35#include <linux/shmem_fs.h>
36#include <linux/security.h>
37#include <linux/syscalls.h>
38#include <linux/audit.h>
c59ede7b 39#include <linux/capability.h>
7d87e14c 40#include <linux/ptrace.h>
19b4946c 41#include <linux/seq_file.h>
3e148c79 42#include <linux/rwsem.h>
4e982311 43#include <linux/nsproxy.h>
bc56bba8 44#include <linux/mount.h>
ae5e1b22 45#include <linux/ipc_namespace.h>
7d87e14c 46
7153e402 47#include <linux/uaccess.h>
1da177e4
LT
48
49#include "util.h"
50
a2e102cd
EB
51struct shmid_kernel /* private to the kernel */
52{
53 struct kern_ipc_perm shm_perm;
54 struct file *shm_file;
55 unsigned long shm_nattch;
56 unsigned long shm_segsz;
57 time64_t shm_atim;
58 time64_t shm_dtim;
59 time64_t shm_ctim;
60 pid_t shm_cprid;
61 pid_t shm_lprid;
62 struct user_struct *mlock_user;
63
64 /* The task created the shm object. NULL if the task is dead. */
65 struct task_struct *shm_creator;
66 struct list_head shm_clist; /* list by creator */
67} __randomize_layout;
68
69/* shm_mode upper byte flags */
70#define SHM_DEST 01000 /* segment will be destroyed on last detach */
71#define SHM_LOCKED 02000 /* segment will not be swapped */
72
bc56bba8
EB
73struct shm_file_data {
74 int id;
75 struct ipc_namespace *ns;
76 struct file *file;
77 const struct vm_operations_struct *vm_ops;
78};
79
80#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
81
9a32144e 82static const struct file_operations shm_file_operations;
f0f37e2f 83static const struct vm_operations_struct shm_vm_ops;
1da177e4 84
ed2ddbf8 85#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
1da177e4 86
4e982311
KK
87#define shm_unlock(shp) \
88 ipc_unlock(&(shp)->shm_perm)
1da177e4 89
7748dbfa 90static int newseg(struct ipc_namespace *, struct ipc_params *);
bc56bba8
EB
91static void shm_open(struct vm_area_struct *vma);
92static void shm_close(struct vm_area_struct *vma);
239521f3 93static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
1da177e4 94#ifdef CONFIG_PROC_FS
19b4946c 95static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
1da177e4
LT
96#endif
97
0cfb6aee 98int shm_init_ns(struct ipc_namespace *ns)
4e982311 99{
4e982311
KK
100 ns->shm_ctlmax = SHMMAX;
101 ns->shm_ctlall = SHMALL;
102 ns->shm_ctlmni = SHMMNI;
b34a6b1d 103 ns->shm_rmid_forced = 0;
4e982311 104 ns->shm_tot = 0;
0cfb6aee 105 return ipc_init_ids(&shm_ids(ns));
4e982311
KK
106}
107
f4566f04 108/*
d9a605e4
DB
109 * Called with shm_ids.rwsem (writer) and the shp structure locked.
110 * Only shm_ids.rwsem remains locked on exit.
f4566f04 111 */
01b8b07a 112static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
4e982311 113{
01b8b07a 114 struct shmid_kernel *shp;
63980c80 115
01b8b07a
PP
116 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
117
239521f3 118 if (shp->shm_nattch) {
4e982311
KK
119 shp->shm_perm.mode |= SHM_DEST;
120 /* Do not find it any more */
0cfb6aee 121 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
4e982311
KK
122 shm_unlock(shp);
123 } else
124 shm_destroy(ns, shp);
125}
126
ae5e1b22 127#ifdef CONFIG_IPC_NS
4e982311
KK
128void shm_exit_ns(struct ipc_namespace *ns)
129{
01b8b07a 130 free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
7d6feeb2 131 idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
0cfb6aee 132 rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
4e982311 133}
ae5e1b22 134#endif
1da177e4 135
140d0b21 136static int __init ipc_ns_init(void)
1da177e4 137{
0cfb6aee
GK
138 const int err = shm_init_ns(&init_ipc_ns);
139 WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
140 return err;
140d0b21
LT
141}
142
143pure_initcall(ipc_ns_init);
144
239521f3 145void __init shm_init(void)
140d0b21 146{
19b4946c 147 ipc_init_proc_interface("sysvipc/shm",
b7952180
HD
148#if BITS_PER_LONG <= 32
149 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
150#else
151 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
152#endif
4e982311 153 IPC_SHM_IDS, sysvipc_shm_proc_show);
1da177e4
LT
154}
155
8b8d52ac
DB
156static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
157{
55b7ae50 158 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
8b8d52ac
DB
159
160 if (IS_ERR(ipcp))
161 return ERR_CAST(ipcp);
162
163 return container_of(ipcp, struct shmid_kernel, shm_perm);
164}
165
166static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
167{
168 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
169
170 if (IS_ERR(ipcp))
171 return ERR_CAST(ipcp);
172
173 return container_of(ipcp, struct shmid_kernel, shm_perm);
174}
175
3e148c79 176/*
d9a605e4 177 * shm_lock_(check_) routines are called in the paths where the rwsem
00c2bf85 178 * is not necessarily held.
3e148c79 179 */
023a5355 180static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
1da177e4 181{
03f02c76
ND
182 struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
183
c5c8975b 184 /*
1ac0b6de
KS
185 * Callers of shm_lock() must validate the status of the returned ipc
186 * object pointer (as returned by ipc_lock()), and error out as
187 * appropriate.
c5c8975b 188 */
1ac0b6de
KS
189 if (IS_ERR(ipcp))
190 return (void *)ipcp;
03f02c76 191 return container_of(ipcp, struct shmid_kernel, shm_perm);
023a5355
ND
192}
193
4c677e2e
VK
194static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
195{
196 rcu_read_lock();
cf9d5d78 197 ipc_lock_object(&ipcp->shm_perm);
4c677e2e
VK
198}
199
53dad6d3
DB
200static void shm_rcu_free(struct rcu_head *head)
201{
dba4cdd3
MS
202 struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
203 rcu);
204 struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
205 shm_perm);
7191adff 206 security_shm_free(&shp->shm_perm);
42e618f7 207 kvfree(shp);
53dad6d3
DB
208}
209
7ca7e564 210static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
1da177e4 211{
ab602f79 212 list_del(&s->shm_clist);
7ca7e564 213 ipc_rmid(&shm_ids(ns), &s->shm_perm);
1da177e4
LT
214}
215
1da177e4 216
1ac0b6de 217static int __shm_open(struct vm_area_struct *vma)
4e982311 218{
bc56bba8
EB
219 struct file *file = vma->vm_file;
220 struct shm_file_data *sfd = shm_file_data(file);
1da177e4
LT
221 struct shmid_kernel *shp;
222
bc56bba8 223 shp = shm_lock(sfd->ns, sfd->id);
1ac0b6de
KS
224
225 if (IS_ERR(shp))
226 return PTR_ERR(shp);
227
7ff2819e 228 shp->shm_atim = ktime_get_real_seconds();
b488893a 229 shp->shm_lprid = task_tgid_vnr(current);
1da177e4
LT
230 shp->shm_nattch++;
231 shm_unlock(shp);
1ac0b6de
KS
232 return 0;
233}
234
235/* This is called by fork, once for every shm attach. */
236static void shm_open(struct vm_area_struct *vma)
237{
238 int err = __shm_open(vma);
239 /*
240 * We raced in the idr lookup or with shm_destroy().
241 * Either way, the ID is busted.
242 */
243 WARN_ON_ONCE(err);
1da177e4
LT
244}
245
1da177e4
LT
246/*
247 * shm_destroy - free the struct shmid_kernel
248 *
f4566f04 249 * @ns: namespace
1da177e4
LT
250 * @shp: struct to free
251 *
d9a605e4 252 * It has to be called with shp and shm_ids.rwsem (writer) locked,
1da177e4
LT
253 * but returns with shp unlocked and freed.
254 */
4e982311 255static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
1da177e4 256{
a399b29d
GT
257 struct file *shm_file;
258
259 shm_file = shp->shm_file;
260 shp->shm_file = NULL;
4e982311 261 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
7ca7e564 262 shm_rmid(ns, shp);
1da177e4 263 shm_unlock(shp);
a399b29d
GT
264 if (!is_file_hugepages(shm_file))
265 shmem_lock(shm_file, 0, shp->mlock_user);
353d5c30 266 else if (shp->mlock_user)
07a46ed2
DH
267 user_shm_unlock(i_size_read(file_inode(shm_file)),
268 shp->mlock_user);
a399b29d 269 fput(shm_file);
dba4cdd3 270 ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
1da177e4
LT
271}
272
b34a6b1d
VK
273/*
274 * shm_may_destroy - identifies whether shm segment should be destroyed now
275 *
276 * Returns true if and only if there are no active users of the segment and
277 * one of the following is true:
278 *
279 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
280 *
281 * 2) sysctl kernel.shm_rmid_forced is set to 1.
282 */
283static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
284{
285 return (shp->shm_nattch == 0) &&
286 (ns->shm_rmid_forced ||
287 (shp->shm_perm.mode & SHM_DEST));
288}
289
1da177e4 290/*
bc56bba8 291 * remove the attach descriptor vma.
1da177e4
LT
292 * free memory for segment if it is marked destroyed.
293 * The descriptor has already been removed from the current->mm->mmap list
294 * and will later be kfree()d.
295 */
bc56bba8 296static void shm_close(struct vm_area_struct *vma)
1da177e4 297{
239521f3 298 struct file *file = vma->vm_file;
bc56bba8 299 struct shm_file_data *sfd = shm_file_data(file);
1da177e4 300 struct shmid_kernel *shp;
bc56bba8 301 struct ipc_namespace *ns = sfd->ns;
4e982311 302
d9a605e4 303 down_write(&shm_ids(ns).rwsem);
1da177e4 304 /* remove from the list of attaches of the shm segment */
00c2bf85 305 shp = shm_lock(ns, sfd->id);
1ac0b6de
KS
306
307 /*
308 * We raced in the idr lookup or with shm_destroy().
309 * Either way, the ID is busted.
310 */
311 if (WARN_ON_ONCE(IS_ERR(shp)))
312 goto done; /* no-op */
313
b488893a 314 shp->shm_lprid = task_tgid_vnr(current);
7ff2819e 315 shp->shm_dtim = ktime_get_real_seconds();
1da177e4 316 shp->shm_nattch--;
b34a6b1d
VK
317 if (shm_may_destroy(ns, shp))
318 shm_destroy(ns, shp);
319 else
320 shm_unlock(shp);
1ac0b6de 321done:
d9a605e4 322 up_write(&shm_ids(ns).rwsem);
b34a6b1d
VK
323}
324
d9a605e4 325/* Called with ns->shm_ids(ns).rwsem locked */
b34a6b1d
VK
326static int shm_try_destroy_orphaned(int id, void *p, void *data)
327{
328 struct ipc_namespace *ns = data;
4c677e2e
VK
329 struct kern_ipc_perm *ipcp = p;
330 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
b34a6b1d
VK
331
332 /*
333 * We want to destroy segments without users and with already
334 * exit'ed originating process.
4c677e2e 335 *
d9a605e4 336 * As shp->* are changed under rwsem, it's safe to skip shp locking.
b34a6b1d 337 */
4c677e2e 338 if (shp->shm_creator != NULL)
b34a6b1d 339 return 0;
b34a6b1d 340
4c677e2e
VK
341 if (shm_may_destroy(ns, shp)) {
342 shm_lock_by_ptr(shp);
4e982311 343 shm_destroy(ns, shp);
4c677e2e 344 }
b34a6b1d
VK
345 return 0;
346}
347
348void shm_destroy_orphaned(struct ipc_namespace *ns)
349{
d9a605e4 350 down_write(&shm_ids(ns).rwsem);
33a30ed4 351 if (shm_ids(ns).in_use)
4c677e2e 352 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
d9a605e4 353 up_write(&shm_ids(ns).rwsem);
b34a6b1d
VK
354}
355
83293c0f 356/* Locking assumes this will only be called with task == current */
b34a6b1d
VK
357void exit_shm(struct task_struct *task)
358{
4c677e2e 359 struct ipc_namespace *ns = task->nsproxy->ipc_ns;
ab602f79 360 struct shmid_kernel *shp, *n;
b34a6b1d 361
83293c0f
JM
362 if (list_empty(&task->sysvshm.shm_clist))
363 return;
364
365 /*
366 * If kernel.shm_rmid_forced is not set then only keep track of
367 * which shmids are orphaned, so that a later set of the sysctl
368 * can clean them up.
369 */
370 if (!ns->shm_rmid_forced) {
371 down_read(&shm_ids(ns).rwsem);
372 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
373 shp->shm_creator = NULL;
374 /*
375 * Only under read lock but we are only called on current
376 * so no entry on the list will be shared.
377 */
378 list_del(&task->sysvshm.shm_clist);
379 up_read(&shm_ids(ns).rwsem);
298507d4 380 return;
83293c0f 381 }
298507d4 382
83293c0f
JM
383 /*
384 * Destroy all already created segments, that were not yet mapped,
385 * and mark any mapped as orphan to cover the sysctl toggling.
386 * Destroy is skipped if shm_may_destroy() returns false.
387 */
d9a605e4 388 down_write(&shm_ids(ns).rwsem);
83293c0f
JM
389 list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
390 shp->shm_creator = NULL;
391
392 if (shm_may_destroy(ns, shp)) {
393 shm_lock_by_ptr(shp);
394 shm_destroy(ns, shp);
395 }
396 }
397
398 /* Remove the list head from any segments still attached. */
ab602f79 399 list_del(&task->sysvshm.shm_clist);
d9a605e4 400 up_write(&shm_ids(ns).rwsem);
1da177e4
LT
401}
402
11bac800 403static int shm_fault(struct vm_fault *vmf)
bc56bba8 404{
11bac800 405 struct file *file = vmf->vma->vm_file;
bc56bba8
EB
406 struct shm_file_data *sfd = shm_file_data(file);
407
11bac800 408 return sfd->vm_ops->fault(vmf);
bc56bba8
EB
409}
410
411#ifdef CONFIG_NUMA
d823e3e7 412static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
bc56bba8
EB
413{
414 struct file *file = vma->vm_file;
415 struct shm_file_data *sfd = shm_file_data(file);
416 int err = 0;
63980c80 417
bc56bba8
EB
418 if (sfd->vm_ops->set_policy)
419 err = sfd->vm_ops->set_policy(vma, new);
420 return err;
421}
422
d823e3e7
AB
423static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
424 unsigned long addr)
bc56bba8
EB
425{
426 struct file *file = vma->vm_file;
427 struct shm_file_data *sfd = shm_file_data(file);
428 struct mempolicy *pol = NULL;
429
430 if (sfd->vm_ops->get_policy)
431 pol = sfd->vm_ops->get_policy(vma, addr);
52cd3b07 432 else if (vma->vm_policy)
bc56bba8 433 pol = vma->vm_policy;
52cd3b07 434
bc56bba8
EB
435 return pol;
436}
437#endif
438
239521f3 439static int shm_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 440{
bc56bba8 441 struct shm_file_data *sfd = shm_file_data(file);
b0e15190
DH
442 int ret;
443
1ac0b6de
KS
444 /*
445 * In case of remap_file_pages() emulation, the file can represent
446 * removed IPC ID: propogate shm_lock() error to caller.
447 */
63980c80 448 ret = __shm_open(vma);
1ac0b6de
KS
449 if (ret)
450 return ret;
451
f74ac015 452 ret = call_mmap(sfd->file, vma);
1ac0b6de
KS
453 if (ret) {
454 shm_close(vma);
bc56bba8 455 return ret;
1ac0b6de 456 }
bc56bba8 457 sfd->vm_ops = vma->vm_ops;
2e92a3ba 458#ifdef CONFIG_MMU
d0edd852 459 WARN_ON(!sfd->vm_ops->fault);
2e92a3ba 460#endif
bc56bba8 461 vma->vm_ops = &shm_vm_ops;
1ac0b6de 462 return 0;
1da177e4
LT
463}
464
4e982311
KK
465static int shm_release(struct inode *ino, struct file *file)
466{
bc56bba8 467 struct shm_file_data *sfd = shm_file_data(file);
4e982311 468
bc56bba8
EB
469 put_ipc_ns(sfd->ns);
470 shm_file_data(file) = NULL;
471 kfree(sfd);
4e982311
KK
472 return 0;
473}
474
02c24a82 475static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
516dffdc 476{
516dffdc 477 struct shm_file_data *sfd = shm_file_data(file);
516dffdc 478
7ea80859
CH
479 if (!sfd->file->f_op->fsync)
480 return -EINVAL;
0f41074a 481 return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
516dffdc
AL
482}
483
7d8a4569
WD
484static long shm_fallocate(struct file *file, int mode, loff_t offset,
485 loff_t len)
486{
487 struct shm_file_data *sfd = shm_file_data(file);
488
489 if (!sfd->file->f_op->fallocate)
490 return -EOPNOTSUPP;
491 return sfd->file->f_op->fallocate(file, mode, offset, len);
492}
493
bc56bba8
EB
494static unsigned long shm_get_unmapped_area(struct file *file,
495 unsigned long addr, unsigned long len, unsigned long pgoff,
496 unsigned long flags)
497{
498 struct shm_file_data *sfd = shm_file_data(file);
63980c80 499
c4caa778
AV
500 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
501 pgoff, flags);
bc56bba8 502}
bc56bba8 503
9a32144e 504static const struct file_operations shm_file_operations = {
4e982311 505 .mmap = shm_mmap,
516dffdc 506 .fsync = shm_fsync,
4e982311 507 .release = shm_release,
ed5e5894 508 .get_unmapped_area = shm_get_unmapped_area,
6038f373 509 .llseek = noop_llseek,
7d8a4569 510 .fallocate = shm_fallocate,
c4caa778
AV
511};
512
c01d5b30
HD
513/*
514 * shm_file_operations_huge is now identical to shm_file_operations,
515 * but we keep it distinct for the sake of is_file_shm_hugepages().
516 */
c4caa778
AV
517static const struct file_operations shm_file_operations_huge = {
518 .mmap = shm_mmap,
519 .fsync = shm_fsync,
520 .release = shm_release,
bc56bba8 521 .get_unmapped_area = shm_get_unmapped_area,
6038f373 522 .llseek = noop_llseek,
7d8a4569 523 .fallocate = shm_fallocate,
1da177e4
LT
524};
525
2954e440 526bool is_file_shm_hugepages(struct file *file)
c4caa778
AV
527{
528 return file->f_op == &shm_file_operations_huge;
529}
530
f0f37e2f 531static const struct vm_operations_struct shm_vm_ops = {
1da177e4
LT
532 .open = shm_open, /* callback for a new vm-area open */
533 .close = shm_close, /* callback for when the vm-area is released */
54cb8821 534 .fault = shm_fault,
bc56bba8
EB
535#if defined(CONFIG_NUMA)
536 .set_policy = shm_set_policy,
537 .get_policy = shm_get_policy,
1da177e4
LT
538#endif
539};
540
f4566f04
ND
541/**
542 * newseg - Create a new shared memory segment
543 * @ns: namespace
544 * @params: ptr to the structure that contains key, size and shmflg
545 *
d9a605e4 546 * Called with shm_ids.rwsem held as a writer.
f4566f04 547 */
7748dbfa 548static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
1da177e4 549{
7748dbfa
ND
550 key_t key = params->key;
551 int shmflg = params->flg;
552 size_t size = params->u.size;
1da177e4
LT
553 int error;
554 struct shmid_kernel *shp;
d69f3bad 555 size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
239521f3 556 struct file *file;
1da177e4 557 char name[13];
ca16d140 558 vm_flags_t acctflag = 0;
1da177e4 559
4e982311 560 if (size < SHMMIN || size > ns->shm_ctlmax)
1da177e4
LT
561 return -EINVAL;
562
1376327c
MS
563 if (numpages << PAGE_SHIFT < size)
564 return -ENOSPC;
565
09c6eb1f
MS
566 if (ns->shm_tot + numpages < ns->shm_tot ||
567 ns->shm_tot + numpages > ns->shm_ctlall)
1da177e4
LT
568 return -ENOSPC;
569
42e618f7
KC
570 shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
571 if (unlikely(!shp))
1da177e4
LT
572 return -ENOMEM;
573
574 shp->shm_perm.key = key;
b33291c0 575 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
1da177e4
LT
576 shp->mlock_user = NULL;
577
578 shp->shm_perm.security = NULL;
7191adff 579 error = security_shm_alloc(&shp->shm_perm);
1da177e4 580 if (error) {
42e618f7 581 kvfree(shp);
1da177e4
LT
582 return error;
583 }
584
239521f3 585 sprintf(name, "SYSV%08x", key);
1da177e4 586 if (shmflg & SHM_HUGETLB) {
c103a4dc 587 struct hstate *hs;
091d0d55
LZ
588 size_t hugesize;
589
c103a4dc 590 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
091d0d55
LZ
591 if (!hs) {
592 error = -EINVAL;
593 goto no_file;
594 }
595 hugesize = ALIGN(size, huge_page_size(hs));
af73e4d9 596
5a6fe125
MG
597 /* hugetlb_file_setup applies strict accounting */
598 if (shmflg & SHM_NORESERVE)
599 acctflag = VM_NORESERVE;
af73e4d9 600 file = hugetlb_file_setup(name, hugesize, acctflag,
42d7395f
AK
601 &shp->mlock_user, HUGETLB_SHMFS_INODE,
602 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
1da177e4 603 } else {
bf8f972d
BP
604 /*
605 * Do not allow no accounting for OVERCOMMIT_NEVER, even
239521f3 606 * if it's asked for.
bf8f972d
BP
607 */
608 if ((shmflg & SHM_NORESERVE) &&
609 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
fc8744ad 610 acctflag = VM_NORESERVE;
e1832f29 611 file = shmem_kernel_file_setup(name, size, acctflag);
1da177e4
LT
612 }
613 error = PTR_ERR(file);
614 if (IS_ERR(file))
615 goto no_file;
616
b488893a 617 shp->shm_cprid = task_tgid_vnr(current);
1da177e4
LT
618 shp->shm_lprid = 0;
619 shp->shm_atim = shp->shm_dtim = 0;
7ff2819e 620 shp->shm_ctim = ktime_get_real_seconds();
1da177e4
LT
621 shp->shm_segsz = size;
622 shp->shm_nattch = 0;
1da177e4 623 shp->shm_file = file;
5774ed01 624 shp->shm_creator = current;
b9a53227 625
39c96a1b 626 /* ipc_addid() locks shp upon success. */
a2642f87
MS
627 error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
628 if (error < 0)
b9a53227 629 goto no_id;
b9a53227 630
ab602f79 631 list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
dbfcd91f 632
30475cc1
BP
633 /*
634 * shmid gets reported as "inode#" in /proc/pid/maps.
635 * proc-ps tools use this. Changing this will break them.
636 */
496ad9aa 637 file_inode(file)->i_ino = shp->shm_perm.id;
551110a9 638
4e982311 639 ns->shm_tot += numpages;
7ca7e564 640 error = shp->shm_perm.id;
dbfcd91f 641
cf9d5d78 642 ipc_unlock_object(&shp->shm_perm);
dbfcd91f 643 rcu_read_unlock();
7ca7e564 644 return error;
1da177e4
LT
645
646no_id:
2195d281 647 if (is_file_hugepages(file) && shp->mlock_user)
353d5c30 648 user_shm_unlock(size, shp->mlock_user);
1da177e4
LT
649 fput(file);
650no_file:
a2642f87 651 call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
1da177e4
LT
652 return error;
653}
654
f4566f04 655/*
d9a605e4 656 * Called with shm_ids.rwsem and ipcp locked.
f4566f04 657 */
03f02c76 658static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
7748dbfa 659{
7191adff 660 return security_shm_associate(ipcp, shmflg);
7748dbfa
ND
661}
662
f4566f04 663/*
d9a605e4 664 * Called with shm_ids.rwsem and ipcp locked.
f4566f04 665 */
03f02c76
ND
666static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
667 struct ipc_params *params)
7748dbfa 668{
03f02c76
ND
669 struct shmid_kernel *shp;
670
671 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
672 if (shp->shm_segsz < params->u.size)
7748dbfa
ND
673 return -EINVAL;
674
675 return 0;
676}
677
d5460c99 678SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
1da177e4 679{
4e982311 680 struct ipc_namespace *ns;
eb66ec44
MK
681 static const struct ipc_ops shm_ops = {
682 .getnew = newseg,
683 .associate = shm_security,
684 .more_checks = shm_more_checks,
685 };
7748dbfa 686 struct ipc_params shm_params;
4e982311
KK
687
688 ns = current->nsproxy->ipc_ns;
1da177e4 689
7748dbfa
ND
690 shm_params.key = key;
691 shm_params.flg = shmflg;
692 shm_params.u.size = size;
1da177e4 693
7748dbfa 694 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
1da177e4
LT
695}
696
697static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
698{
239521f3 699 switch (version) {
1da177e4
LT
700 case IPC_64:
701 return copy_to_user(buf, in, sizeof(*in));
702 case IPC_OLD:
703 {
704 struct shmid_ds out;
705
3af54c9b 706 memset(&out, 0, sizeof(out));
1da177e4
LT
707 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
708 out.shm_segsz = in->shm_segsz;
709 out.shm_atime = in->shm_atime;
710 out.shm_dtime = in->shm_dtime;
711 out.shm_ctime = in->shm_ctime;
712 out.shm_cpid = in->shm_cpid;
713 out.shm_lpid = in->shm_lpid;
714 out.shm_nattch = in->shm_nattch;
715
716 return copy_to_user(buf, &out, sizeof(out));
717 }
718 default:
719 return -EINVAL;
720 }
721}
722
016d7132
PP
723static inline unsigned long
724copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
1da177e4 725{
239521f3 726 switch (version) {
1da177e4 727 case IPC_64:
016d7132 728 if (copy_from_user(out, buf, sizeof(*out)))
1da177e4 729 return -EFAULT;
1da177e4 730 return 0;
1da177e4
LT
731 case IPC_OLD:
732 {
733 struct shmid_ds tbuf_old;
734
735 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
736 return -EFAULT;
737
016d7132
PP
738 out->shm_perm.uid = tbuf_old.shm_perm.uid;
739 out->shm_perm.gid = tbuf_old.shm_perm.gid;
740 out->shm_perm.mode = tbuf_old.shm_perm.mode;
1da177e4
LT
741
742 return 0;
743 }
744 default:
745 return -EINVAL;
746 }
747}
748
749static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
750{
239521f3 751 switch (version) {
1da177e4
LT
752 case IPC_64:
753 return copy_to_user(buf, in, sizeof(*in));
754 case IPC_OLD:
755 {
756 struct shminfo out;
757
239521f3 758 if (in->shmmax > INT_MAX)
1da177e4
LT
759 out.shmmax = INT_MAX;
760 else
761 out.shmmax = (int)in->shmmax;
762
763 out.shmmin = in->shmmin;
764 out.shmmni = in->shmmni;
765 out.shmseg = in->shmseg;
46c0a8ca 766 out.shmall = in->shmall;
1da177e4
LT
767
768 return copy_to_user(buf, &out, sizeof(out));
769 }
770 default:
771 return -EINVAL;
772 }
773}
774
b7952180
HD
775/*
776 * Calculate and add used RSS and swap pages of a shm.
d9a605e4 777 * Called with shm_ids.rwsem held as a reader
b7952180
HD
778 */
779static void shm_add_rss_swap(struct shmid_kernel *shp,
780 unsigned long *rss_add, unsigned long *swp_add)
781{
782 struct inode *inode;
783
496ad9aa 784 inode = file_inode(shp->shm_file);
b7952180
HD
785
786 if (is_file_hugepages(shp->shm_file)) {
787 struct address_space *mapping = inode->i_mapping;
788 struct hstate *h = hstate_file(shp->shm_file);
789 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
790 } else {
791#ifdef CONFIG_SHMEM
792 struct shmem_inode_info *info = SHMEM_I(inode);
63980c80 793
4595ef88 794 spin_lock_irq(&info->lock);
b7952180
HD
795 *rss_add += inode->i_mapping->nrpages;
796 *swp_add += info->swapped;
4595ef88 797 spin_unlock_irq(&info->lock);
b7952180
HD
798#else
799 *rss_add += inode->i_mapping->nrpages;
800#endif
801 }
802}
803
f4566f04 804/*
d9a605e4 805 * Called with shm_ids.rwsem held as a reader
f4566f04 806 */
4e982311
KK
807static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
808 unsigned long *swp)
1da177e4 809{
7ca7e564
ND
810 int next_id;
811 int total, in_use;
1da177e4
LT
812
813 *rss = 0;
814 *swp = 0;
815
7ca7e564
ND
816 in_use = shm_ids(ns).in_use;
817
818 for (total = 0, next_id = 0; total < in_use; next_id++) {
e562aebc 819 struct kern_ipc_perm *ipc;
1da177e4 820 struct shmid_kernel *shp;
1da177e4 821
e562aebc
TB
822 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
823 if (ipc == NULL)
1da177e4 824 continue;
e562aebc 825 shp = container_of(ipc, struct shmid_kernel, shm_perm);
1da177e4 826
b7952180 827 shm_add_rss_swap(shp, rss, swp);
7ca7e564
ND
828
829 total++;
1da177e4
LT
830 }
831}
832
8d4cc8b5 833/*
d9a605e4 834 * This function handles some shmctl commands which require the rwsem
8d4cc8b5 835 * to be held in write mode.
d9a605e4 836 * NOTE: no locks must be held, the rwsem is taken inside this function.
8d4cc8b5
PP
837 */
838static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
9ba720c1 839 struct shmid64_ds *shmid64)
1da177e4 840{
8d4cc8b5 841 struct kern_ipc_perm *ipcp;
8d4cc8b5
PP
842 struct shmid_kernel *shp;
843 int err;
844
d9a605e4 845 down_write(&shm_ids(ns).rwsem);
7b4cc5d8
DB
846 rcu_read_lock();
847
79ccf0f8 848 ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
9ba720c1 849 &shmid64->shm_perm, 0);
7b4cc5d8
DB
850 if (IS_ERR(ipcp)) {
851 err = PTR_ERR(ipcp);
7b4cc5d8
DB
852 goto out_unlock1;
853 }
8d4cc8b5 854
a5f75e7f 855 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
8d4cc8b5 856
7191adff 857 err = security_shm_shmctl(&shp->shm_perm, cmd);
8d4cc8b5 858 if (err)
79ccf0f8 859 goto out_unlock1;
7b4cc5d8 860
8d4cc8b5
PP
861 switch (cmd) {
862 case IPC_RMID:
79ccf0f8 863 ipc_lock_object(&shp->shm_perm);
7b4cc5d8 864 /* do_shm_rmid unlocks the ipc object and rcu */
8d4cc8b5
PP
865 do_shm_rmid(ns, ipcp);
866 goto out_up;
867 case IPC_SET:
79ccf0f8 868 ipc_lock_object(&shp->shm_perm);
9ba720c1 869 err = ipc_update_perm(&shmid64->shm_perm, ipcp);
1efdb69b 870 if (err)
7b4cc5d8 871 goto out_unlock0;
7ff2819e 872 shp->shm_ctim = ktime_get_real_seconds();
8d4cc8b5
PP
873 break;
874 default:
875 err = -EINVAL;
79ccf0f8 876 goto out_unlock1;
8d4cc8b5 877 }
7b4cc5d8
DB
878
879out_unlock0:
880 ipc_unlock_object(&shp->shm_perm);
881out_unlock1:
882 rcu_read_unlock();
8d4cc8b5 883out_up:
d9a605e4 884 up_write(&shm_ids(ns).rwsem);
8d4cc8b5
PP
885 return err;
886}
887
9ba720c1
AV
888static int shmctl_ipc_info(struct ipc_namespace *ns,
889 struct shminfo64 *shminfo)
8d4cc8b5 890{
9ba720c1
AV
891 int err = security_shm_shmctl(NULL, IPC_INFO);
892 if (!err) {
893 memset(shminfo, 0, sizeof(*shminfo));
894 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
895 shminfo->shmmax = ns->shm_ctlmax;
896 shminfo->shmall = ns->shm_ctlall;
897 shminfo->shmmin = SHMMIN;
d9a605e4 898 down_read(&shm_ids(ns).rwsem);
7ca7e564 899 err = ipc_get_maxid(&shm_ids(ns));
d9a605e4 900 up_read(&shm_ids(ns).rwsem);
239521f3 901 if (err < 0)
1da177e4 902 err = 0;
1da177e4 903 }
9ba720c1
AV
904 return err;
905}
1da177e4 906
9ba720c1
AV
907static int shmctl_shm_info(struct ipc_namespace *ns,
908 struct shm_info *shm_info)
909{
910 int err = security_shm_shmctl(NULL, SHM_INFO);
911 if (!err) {
912 memset(shm_info, 0, sizeof(*shm_info));
d9a605e4 913 down_read(&shm_ids(ns).rwsem);
9ba720c1
AV
914 shm_info->used_ids = shm_ids(ns).in_use;
915 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
916 shm_info->shm_tot = ns->shm_tot;
917 shm_info->swap_attempts = 0;
918 shm_info->swap_successes = 0;
7ca7e564 919 err = ipc_get_maxid(&shm_ids(ns));
d9a605e4 920 up_read(&shm_ids(ns).rwsem);
9ba720c1
AV
921 if (err < 0)
922 err = 0;
1da177e4 923 }
9ba720c1
AV
924 return err;
925}
c97cb9cc 926
9ba720c1
AV
927static int shmctl_stat(struct ipc_namespace *ns, int shmid,
928 int cmd, struct shmid64_ds *tbuf)
929{
930 struct shmid_kernel *shp;
87ad4b0d 931 int id = 0;
9ba720c1 932 int err;
c97cb9cc 933
87ad4b0d
PM
934 memset(tbuf, 0, sizeof(*tbuf));
935
9ba720c1
AV
936 rcu_read_lock();
937 if (cmd == SHM_STAT) {
938 shp = shm_obtain_object(ns, shmid);
939 if (IS_ERR(shp)) {
940 err = PTR_ERR(shp);
941 goto out_unlock;
942 }
87ad4b0d 943 id = shp->shm_perm.id;
9ba720c1
AV
944 } else {
945 shp = shm_obtain_object_check(ns, shmid);
946 if (IS_ERR(shp)) {
947 err = PTR_ERR(shp);
1da177e4 948 goto out_unlock;
1da177e4 949 }
9ba720c1 950 }
1da177e4 951
9ba720c1
AV
952 err = -EACCES;
953 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
954 goto out_unlock;
c97cb9cc 955
7191adff 956 err = security_shm_shmctl(&shp->shm_perm, cmd);
9ba720c1
AV
957 if (err)
958 goto out_unlock;
959
87ad4b0d
PM
960 ipc_lock_object(&shp->shm_perm);
961
962 if (!ipc_valid_object(&shp->shm_perm)) {
963 ipc_unlock_object(&shp->shm_perm);
964 err = -EIDRM;
965 goto out_unlock;
966 }
967
9ba720c1
AV
968 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
969 tbuf->shm_segsz = shp->shm_segsz;
970 tbuf->shm_atime = shp->shm_atim;
971 tbuf->shm_dtime = shp->shm_dtim;
972 tbuf->shm_ctime = shp->shm_ctim;
973 tbuf->shm_cpid = shp->shm_cprid;
974 tbuf->shm_lpid = shp->shm_lprid;
975 tbuf->shm_nattch = shp->shm_nattch;
87ad4b0d
PM
976
977 ipc_unlock_object(&shp->shm_perm);
9ba720c1 978 rcu_read_unlock();
87ad4b0d 979 return id;
68eccc1d
DB
980
981out_unlock:
c97cb9cc 982 rcu_read_unlock();
68eccc1d
DB
983 return err;
984}
985
9ba720c1 986static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
68eccc1d
DB
987{
988 struct shmid_kernel *shp;
9ba720c1
AV
989 struct file *shm_file;
990 int err;
68eccc1d 991
9ba720c1
AV
992 rcu_read_lock();
993 shp = shm_obtain_object_check(ns, shmid);
994 if (IS_ERR(shp)) {
995 err = PTR_ERR(shp);
996 goto out_unlock1;
1da177e4 997 }
c97cb9cc 998
9ba720c1 999 audit_ipc_obj(&(shp->shm_perm));
7191adff 1000 err = security_shm_shmctl(&shp->shm_perm, cmd);
9ba720c1
AV
1001 if (err)
1002 goto out_unlock1;
c97cb9cc 1003
9ba720c1 1004 ipc_lock_object(&shp->shm_perm);
c97cb9cc 1005
9ba720c1
AV
1006 /* check if shm_destroy() is tearing down shp */
1007 if (!ipc_valid_object(&shp->shm_perm)) {
1008 err = -EIDRM;
1009 goto out_unlock0;
1da177e4 1010 }
073115d6 1011
9ba720c1
AV
1012 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1013 kuid_t euid = current_euid();
0f3d2b01 1014
9ba720c1
AV
1015 if (!uid_eq(euid, shp->shm_perm.uid) &&
1016 !uid_eq(euid, shp->shm_perm.cuid)) {
1017 err = -EPERM;
0f3d2b01
RA
1018 goto out_unlock0;
1019 }
9ba720c1
AV
1020 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1021 err = -EPERM;
1022 goto out_unlock0;
1da177e4 1023 }
68eccc1d
DB
1024 }
1025
9ba720c1
AV
1026 shm_file = shp->shm_file;
1027 if (is_file_hugepages(shm_file))
1028 goto out_unlock0;
85046579 1029
9ba720c1
AV
1030 if (cmd == SHM_LOCK) {
1031 struct user_struct *user = current_user();
63980c80 1032
9ba720c1
AV
1033 err = shmem_lock(shm_file, 1, user);
1034 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1035 shp->shm_perm.mode |= SHM_LOCKED;
1036 shp->mlock_user = user;
1da177e4 1037 }
9ba720c1 1038 goto out_unlock0;
1da177e4
LT
1039 }
1040
9ba720c1
AV
1041 /* SHM_UNLOCK */
1042 if (!(shp->shm_perm.mode & SHM_LOCKED))
1043 goto out_unlock0;
1044 shmem_lock(shm_file, 0, shp->mlock_user);
1045 shp->shm_perm.mode &= ~SHM_LOCKED;
1046 shp->mlock_user = NULL;
1047 get_file(shm_file);
1048 ipc_unlock_object(&shp->shm_perm);
1049 rcu_read_unlock();
1050 shmem_unlock_mapping(shm_file->f_mapping);
1051
1052 fput(shm_file);
1053 return err;
1054
2caacaa8
DB
1055out_unlock0:
1056 ipc_unlock_object(&shp->shm_perm);
1057out_unlock1:
c97cb9cc 1058 rcu_read_unlock();
68eccc1d
DB
1059 return err;
1060}
1061
1062SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1063{
68eccc1d
DB
1064 int err, version;
1065 struct ipc_namespace *ns;
553f770e 1066 struct shmid64_ds sem64;
68eccc1d 1067
2caacaa8
DB
1068 if (cmd < 0 || shmid < 0)
1069 return -EINVAL;
68eccc1d
DB
1070
1071 version = ipc_parse_version(&cmd);
1072 ns = current->nsproxy->ipc_ns;
1073
1074 switch (cmd) {
9ba720c1
AV
1075 case IPC_INFO: {
1076 struct shminfo64 shminfo;
1077 err = shmctl_ipc_info(ns, &shminfo);
1078 if (err < 0)
1079 return err;
1080 if (copy_shminfo_to_user(buf, &shminfo, version))
1081 err = -EFAULT;
1082 return err;
1083 }
1084 case SHM_INFO: {
1085 struct shm_info shm_info;
1086 err = shmctl_shm_info(ns, &shm_info);
1087 if (err < 0)
1088 return err;
1089 if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1090 err = -EFAULT;
1091 return err;
1092 }
68eccc1d 1093 case SHM_STAT:
9ba720c1 1094 case IPC_STAT: {
553f770e 1095 err = shmctl_stat(ns, shmid, cmd, &sem64);
9ba720c1
AV
1096 if (err < 0)
1097 return err;
553f770e 1098 if (copy_shmid_to_user(buf, &sem64, version))
9ba720c1
AV
1099 err = -EFAULT;
1100 return err;
1101 }
2caacaa8 1102 case IPC_SET:
553f770e 1103 if (copy_shmid_from_user(&sem64, buf, version))
9ba720c1 1104 return -EFAULT;
553f770e 1105 /* fallthru */
9ba720c1 1106 case IPC_RMID:
553f770e 1107 return shmctl_down(ns, shmid, cmd, &sem64);
1da177e4
LT
1108 case SHM_LOCK:
1109 case SHM_UNLOCK:
9ba720c1
AV
1110 return shmctl_do_lock(ns, shmid, cmd);
1111 default:
1112 return -EINVAL;
1113 }
1114}
89e004ea 1115
553f770e
AV
1116#ifdef CONFIG_COMPAT
1117
1118struct compat_shmid_ds {
1119 struct compat_ipc_perm shm_perm;
1120 int shm_segsz;
1121 compat_time_t shm_atime;
1122 compat_time_t shm_dtime;
1123 compat_time_t shm_ctime;
1124 compat_ipc_pid_t shm_cpid;
1125 compat_ipc_pid_t shm_lpid;
1126 unsigned short shm_nattch;
1127 unsigned short shm_unused;
1128 compat_uptr_t shm_unused2;
1129 compat_uptr_t shm_unused3;
1130};
1da177e4 1131
553f770e
AV
1132struct compat_shminfo64 {
1133 compat_ulong_t shmmax;
1134 compat_ulong_t shmmin;
1135 compat_ulong_t shmmni;
1136 compat_ulong_t shmseg;
1137 compat_ulong_t shmall;
1138 compat_ulong_t __unused1;
1139 compat_ulong_t __unused2;
1140 compat_ulong_t __unused3;
1141 compat_ulong_t __unused4;
1142};
073115d6 1143
553f770e
AV
1144struct compat_shm_info {
1145 compat_int_t used_ids;
1146 compat_ulong_t shm_tot, shm_rss, shm_swp;
1147 compat_ulong_t swap_attempts, swap_successes;
1148};
0f3d2b01 1149
553f770e
AV
1150static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1151 int version)
1152{
1153 if (in->shmmax > INT_MAX)
1154 in->shmmax = INT_MAX;
1155 if (version == IPC_64) {
1156 struct compat_shminfo64 info;
1157 memset(&info, 0, sizeof(info));
1158 info.shmmax = in->shmmax;
1159 info.shmmin = in->shmmin;
1160 info.shmmni = in->shmmni;
1161 info.shmseg = in->shmseg;
1162 info.shmall = in->shmall;
1163 return copy_to_user(buf, &info, sizeof(info));
1164 } else {
1165 struct shminfo info;
1166 memset(&info, 0, sizeof(info));
1167 info.shmmax = in->shmmax;
1168 info.shmmin = in->shmmin;
1169 info.shmmni = in->shmmni;
1170 info.shmseg = in->shmseg;
1171 info.shmall = in->shmall;
1172 return copy_to_user(buf, &info, sizeof(info));
1173 }
1174}
0f3d2b01 1175
553f770e
AV
1176static int put_compat_shm_info(struct shm_info *ip,
1177 struct compat_shm_info __user *uip)
1178{
1179 struct compat_shm_info info;
1180
1181 memset(&info, 0, sizeof(info));
1182 info.used_ids = ip->used_ids;
1183 info.shm_tot = ip->shm_tot;
1184 info.shm_rss = ip->shm_rss;
1185 info.shm_swp = ip->shm_swp;
1186 info.swap_attempts = ip->swap_attempts;
1187 info.swap_successes = ip->swap_successes;
b776e4b1 1188 return copy_to_user(uip, &info, sizeof(info));
553f770e 1189}
1da177e4 1190
553f770e
AV
1191static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1192 int version)
1193{
1194 if (version == IPC_64) {
1195 struct compat_shmid64_ds v;
1196 memset(&v, 0, sizeof(v));
28327fae 1197 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
553f770e
AV
1198 v.shm_atime = in->shm_atime;
1199 v.shm_dtime = in->shm_dtime;
1200 v.shm_ctime = in->shm_ctime;
1201 v.shm_segsz = in->shm_segsz;
1202 v.shm_nattch = in->shm_nattch;
1203 v.shm_cpid = in->shm_cpid;
1204 v.shm_lpid = in->shm_lpid;
1205 return copy_to_user(buf, &v, sizeof(v));
1206 } else {
1207 struct compat_shmid_ds v;
1208 memset(&v, 0, sizeof(v));
28327fae 1209 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
553f770e 1210 v.shm_perm.key = in->shm_perm.key;
553f770e
AV
1211 v.shm_atime = in->shm_atime;
1212 v.shm_dtime = in->shm_dtime;
1213 v.shm_ctime = in->shm_ctime;
1214 v.shm_segsz = in->shm_segsz;
1215 v.shm_nattch = in->shm_nattch;
1216 v.shm_cpid = in->shm_cpid;
1217 v.shm_lpid = in->shm_lpid;
1218 return copy_to_user(buf, &v, sizeof(v));
1219 }
1220}
85046579 1221
553f770e
AV
1222static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1223 int version)
1224{
1225 memset(out, 0, sizeof(*out));
1226 if (version == IPC_64) {
6aa211e8 1227 struct compat_shmid64_ds __user *p = buf;
28327fae 1228 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
553f770e 1229 } else {
6aa211e8 1230 struct compat_shmid_ds __user *p = buf;
28327fae 1231 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
553f770e 1232 }
553f770e 1233}
63980c80 1234
553f770e
AV
1235COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1236{
1237 struct ipc_namespace *ns;
1238 struct shmid64_ds sem64;
1239 int version = compat_ipc_parse_version(&cmd);
1240 int err;
85046579 1241
553f770e
AV
1242 ns = current->nsproxy->ipc_ns;
1243
1244 if (cmd < 0 || shmid < 0)
1245 return -EINVAL;
2caacaa8 1246
553f770e
AV
1247 switch (cmd) {
1248 case IPC_INFO: {
1249 struct shminfo64 shminfo;
1250 err = shmctl_ipc_info(ns, &shminfo);
1251 if (err < 0)
1252 return err;
1253 if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1254 err = -EFAULT;
1255 return err;
1256 }
1257 case SHM_INFO: {
1258 struct shm_info shm_info;
1259 err = shmctl_shm_info(ns, &shm_info);
1260 if (err < 0)
1261 return err;
1262 if (put_compat_shm_info(&shm_info, uptr))
1263 err = -EFAULT;
8d4cc8b5 1264 return err;
2caacaa8 1265 }
553f770e
AV
1266 case IPC_STAT:
1267 case SHM_STAT:
1268 err = shmctl_stat(ns, shmid, cmd, &sem64);
1269 if (err < 0)
1270 return err;
58aff0af 1271 if (copy_compat_shmid_to_user(uptr, &sem64, version))
553f770e
AV
1272 err = -EFAULT;
1273 return err;
1274
1275 case IPC_SET:
1276 if (copy_compat_shmid_from_user(&sem64, uptr, version))
1277 return -EFAULT;
1278 /* fallthru */
1279 case IPC_RMID:
1280 return shmctl_down(ns, shmid, cmd, &sem64);
1281 case SHM_LOCK:
1282 case SHM_UNLOCK:
1283 return shmctl_do_lock(ns, shmid, cmd);
1284 break;
1da177e4 1285 default:
8d4cc8b5 1286 return -EINVAL;
1da177e4 1287 }
1da177e4
LT
1288 return err;
1289}
553f770e 1290#endif
1da177e4
LT
1291
1292/*
1293 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1294 *
1295 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1296 * "raddr" thing points to kernel space, and there has to be a wrapper around
1297 * this.
1298 */
95e91b83
DB
1299long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1300 ulong *raddr, unsigned long shmlba)
1da177e4
LT
1301{
1302 struct shmid_kernel *shp;
f0cb8802 1303 unsigned long addr = (unsigned long)shmaddr;
1da177e4 1304 unsigned long size;
239521f3 1305 struct file *file;
1da177e4 1306 int err;
f0cb8802 1307 unsigned long flags = MAP_SHARED;
1da177e4 1308 unsigned long prot;
1da177e4 1309 int acc_mode;
4e982311 1310 struct ipc_namespace *ns;
bc56bba8
EB
1311 struct shm_file_data *sfd;
1312 struct path path;
aeb5d727 1313 fmode_t f_mode;
41badc15 1314 unsigned long populate = 0;
1da177e4 1315
bc56bba8
EB
1316 err = -EINVAL;
1317 if (shmid < 0)
1da177e4 1318 goto out;
f0cb8802
DB
1319
1320 if (addr) {
079a96ae 1321 if (addr & (shmlba - 1)) {
95e91b83
DB
1322 /*
1323 * Round down to the nearest multiple of shmlba.
1324 * For sane do_mmap_pgoff() parameters, avoid
1325 * round downs that trigger nil-page and MAP_FIXED.
1326 */
1327 if ((shmflg & SHM_RND) && addr >= shmlba)
1328 addr &= ~(shmlba - 1);
1da177e4
LT
1329 else
1330#ifndef __ARCH_FORCE_SHMLBA
1331 if (addr & ~PAGE_MASK)
1332#endif
bc56bba8 1333 goto out;
1da177e4 1334 }
1da177e4 1335
f0cb8802
DB
1336 flags |= MAP_FIXED;
1337 } else if ((shmflg & SHM_REMAP))
1338 goto out;
1da177e4
LT
1339
1340 if (shmflg & SHM_RDONLY) {
1341 prot = PROT_READ;
1da177e4 1342 acc_mode = S_IRUGO;
bc56bba8 1343 f_mode = FMODE_READ;
1da177e4
LT
1344 } else {
1345 prot = PROT_READ | PROT_WRITE;
1da177e4 1346 acc_mode = S_IRUGO | S_IWUGO;
bc56bba8 1347 f_mode = FMODE_READ | FMODE_WRITE;
1da177e4
LT
1348 }
1349 if (shmflg & SHM_EXEC) {
1350 prot |= PROT_EXEC;
1351 acc_mode |= S_IXUGO;
1352 }
1353
1354 /*
1355 * We cannot rely on the fs check since SYSV IPC does have an
1356 * additional creator id...
1357 */
4e982311 1358 ns = current->nsproxy->ipc_ns;
c2c737a0
DB
1359 rcu_read_lock();
1360 shp = shm_obtain_object_check(ns, shmid);
023a5355
ND
1361 if (IS_ERR(shp)) {
1362 err = PTR_ERR(shp);
c2c737a0 1363 goto out_unlock;
023a5355 1364 }
bc56bba8
EB
1365
1366 err = -EACCES;
b0e77598 1367 if (ipcperms(ns, &shp->shm_perm, acc_mode))
bc56bba8 1368 goto out_unlock;
1da177e4 1369
7191adff 1370 err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
bc56bba8
EB
1371 if (err)
1372 goto out_unlock;
1373
c2c737a0 1374 ipc_lock_object(&shp->shm_perm);
a399b29d
GT
1375
1376 /* check if shm_destroy() is tearing down shp */
0f3d2b01 1377 if (!ipc_valid_object(&shp->shm_perm)) {
a399b29d
GT
1378 ipc_unlock_object(&shp->shm_perm);
1379 err = -EIDRM;
1380 goto out_unlock;
1381 }
1382
2c48b9c4
AV
1383 path = shp->shm_file->f_path;
1384 path_get(&path);
1da177e4 1385 shp->shm_nattch++;
75c3cfa8 1386 size = i_size_read(d_inode(path.dentry));
c2c737a0
DB
1387 ipc_unlock_object(&shp->shm_perm);
1388 rcu_read_unlock();
1da177e4 1389
bc56bba8
EB
1390 err = -ENOMEM;
1391 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
f42569b1
DB
1392 if (!sfd) {
1393 path_put(&path);
1394 goto out_nattch;
1395 }
bc56bba8 1396
2c48b9c4
AV
1397 file = alloc_file(&path, f_mode,
1398 is_file_hugepages(shp->shm_file) ?
c4caa778
AV
1399 &shm_file_operations_huge :
1400 &shm_file_operations);
39b65252 1401 err = PTR_ERR(file);
f42569b1
DB
1402 if (IS_ERR(file)) {
1403 kfree(sfd);
1404 path_put(&path);
1405 goto out_nattch;
1406 }
bc56bba8 1407
bc56bba8 1408 file->private_data = sfd;
bc56bba8 1409 file->f_mapping = shp->shm_file->f_mapping;
7ca7e564 1410 sfd->id = shp->shm_perm.id;
bc56bba8
EB
1411 sfd->ns = get_ipc_ns(ns);
1412 sfd->file = shp->shm_file;
1413 sfd->vm_ops = NULL;
1414
8b3ec681
AV
1415 err = security_mmap_file(file, prot, flags);
1416 if (err)
1417 goto out_fput;
1418
91f4f94e
MH
1419 if (down_write_killable(&current->mm->mmap_sem)) {
1420 err = -EINTR;
1421 goto out_fput;
1422 }
1423
1da177e4 1424 if (addr && !(shmflg & SHM_REMAP)) {
bc56bba8 1425 err = -EINVAL;
247a8ce8
MS
1426 if (addr + size < addr)
1427 goto invalid;
1428
1da177e4
LT
1429 if (find_vma_intersection(current->mm, addr, addr + size))
1430 goto invalid;
1da177e4 1431 }
f42569b1 1432
897ab3e0 1433 addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
bebeb3d6 1434 *raddr = addr;
bc56bba8 1435 err = 0;
bebeb3d6
ML
1436 if (IS_ERR_VALUE(addr))
1437 err = (long)addr;
1da177e4
LT
1438invalid:
1439 up_write(&current->mm->mmap_sem);
bebeb3d6 1440 if (populate)
41badc15 1441 mm_populate(addr, populate);
1da177e4 1442
8b3ec681 1443out_fput:
bc56bba8
EB
1444 fput(file);
1445
1446out_nattch:
d9a605e4 1447 down_write(&shm_ids(ns).rwsem);
00c2bf85 1448 shp = shm_lock(ns, shmid);
1da177e4 1449 shp->shm_nattch--;
b34a6b1d 1450 if (shm_may_destroy(ns, shp))
4e982311 1451 shm_destroy(ns, shp);
1da177e4
LT
1452 else
1453 shm_unlock(shp);
d9a605e4 1454 up_write(&shm_ids(ns).rwsem);
1da177e4 1455 return err;
bc56bba8
EB
1456
1457out_unlock:
c2c737a0 1458 rcu_read_unlock();
f42569b1
DB
1459out:
1460 return err;
1da177e4
LT
1461}
1462
d5460c99 1463SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
7d87e14c
SR
1464{
1465 unsigned long ret;
1466 long err;
1467
079a96ae 1468 err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
7d87e14c
SR
1469 if (err)
1470 return err;
1471 force_successful_syscall_return();
1472 return (long)ret;
1473}
1474
a78ee9ed
AV
1475#ifdef CONFIG_COMPAT
1476
1477#ifndef COMPAT_SHMLBA
1478#define COMPAT_SHMLBA SHMLBA
1479#endif
1480
1481COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1482{
1483 unsigned long ret;
1484 long err;
1485
1486 err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1487 if (err)
1488 return err;
1489 force_successful_syscall_return();
1490 return (long)ret;
1491}
1492#endif
1493
1da177e4
LT
1494/*
1495 * detach and kill segment if marked destroyed.
1496 * The work is done in shm_close.
1497 */
d5460c99 1498SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1da177e4
LT
1499{
1500 struct mm_struct *mm = current->mm;
586c7e6a 1501 struct vm_area_struct *vma;
1da177e4 1502 unsigned long addr = (unsigned long)shmaddr;
1da177e4 1503 int retval = -EINVAL;
586c7e6a
MF
1504#ifdef CONFIG_MMU
1505 loff_t size = 0;
d3c97900 1506 struct file *file;
586c7e6a
MF
1507 struct vm_area_struct *next;
1508#endif
1da177e4 1509
df1e2fb5
HD
1510 if (addr & ~PAGE_MASK)
1511 return retval;
1512
91f4f94e
MH
1513 if (down_write_killable(&mm->mmap_sem))
1514 return -EINTR;
1da177e4
LT
1515
1516 /*
1517 * This function tries to be smart and unmap shm segments that
1518 * were modified by partial mlock or munmap calls:
1519 * - It first determines the size of the shm segment that should be
1520 * unmapped: It searches for a vma that is backed by shm and that
1521 * started at address shmaddr. It records it's size and then unmaps
1522 * it.
1523 * - Then it unmaps all shm vmas that started at shmaddr and that
d3c97900
DH
1524 * are within the initially determined size and that are from the
1525 * same shm segment from which we determined the size.
1da177e4
LT
1526 * Errors from do_munmap are ignored: the function only fails if
1527 * it's called with invalid parameters or if it's called to unmap
1528 * a part of a vma. Both calls in this function are for full vmas,
1529 * the parameters are directly copied from the vma itself and always
1530 * valid - therefore do_munmap cannot fail. (famous last words?)
1531 */
1532 /*
1533 * If it had been mremap()'d, the starting address would not
1534 * match the usual checks anyway. So assume all vma's are
1535 * above the starting address given.
1536 */
1537 vma = find_vma(mm, addr);
1538
8feae131 1539#ifdef CONFIG_MMU
1da177e4
LT
1540 while (vma) {
1541 next = vma->vm_next;
1542
1543 /*
1544 * Check if the starting address would match, i.e. it's
1545 * a fragment created by mprotect() and/or munmap(), or it
1546 * otherwise it starts at this address with no hassles.
1547 */
bc56bba8 1548 if ((vma->vm_ops == &shm_vm_ops) &&
1da177e4
LT
1549 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1550
d3c97900
DH
1551 /*
1552 * Record the file of the shm segment being
1553 * unmapped. With mremap(), someone could place
1554 * page from another segment but with equal offsets
1555 * in the range we are unmapping.
1556 */
1557 file = vma->vm_file;
07a46ed2 1558 size = i_size_read(file_inode(vma->vm_file));
897ab3e0 1559 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1da177e4
LT
1560 /*
1561 * We discovered the size of the shm segment, so
1562 * break out of here and fall through to the next
1563 * loop that uses the size information to stop
1564 * searching for matching vma's.
1565 */
1566 retval = 0;
1567 vma = next;
1568 break;
1569 }
1570 vma = next;
1571 }
1572
1573 /*
1574 * We need look no further than the maximum address a fragment
1575 * could possibly have landed at. Also cast things to loff_t to
25985edc 1576 * prevent overflows and make comparisons vs. equal-width types.
1da177e4 1577 */
8e36709d 1578 size = PAGE_ALIGN(size);
1da177e4
LT
1579 while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1580 next = vma->vm_next;
1581
1582 /* finding a matching vma now does not alter retval */
bc56bba8 1583 if ((vma->vm_ops == &shm_vm_ops) &&
d3c97900
DH
1584 ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1585 (vma->vm_file == file))
897ab3e0 1586 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1da177e4
LT
1587 vma = next;
1588 }
1589
63980c80 1590#else /* CONFIG_MMU */
8feae131 1591 /* under NOMMU conditions, the exact address to be destroyed must be
63980c80
SP
1592 * given
1593 */
530fcd16 1594 if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
897ab3e0 1595 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
8feae131
DH
1596 retval = 0;
1597 }
1598
1599#endif
1600
1da177e4
LT
1601 up_write(&mm->mmap_sem);
1602 return retval;
1603}
1604
1605#ifdef CONFIG_PROC_FS
19b4946c 1606static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1da177e4 1607{
1efdb69b 1608 struct user_namespace *user_ns = seq_user_ns(s);
ade9f91b
KC
1609 struct kern_ipc_perm *ipcp = it;
1610 struct shmid_kernel *shp;
b7952180
HD
1611 unsigned long rss = 0, swp = 0;
1612
ade9f91b 1613 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
b7952180 1614 shm_add_rss_swap(shp, &rss, &swp);
1da177e4 1615
6c826818
PM
1616#if BITS_PER_LONG <= 32
1617#define SIZE_SPEC "%10lu"
1618#else
1619#define SIZE_SPEC "%21lu"
1620#endif
1da177e4 1621
7f032d6e
JP
1622 seq_printf(s,
1623 "%10d %10d %4o " SIZE_SPEC " %5u %5u "
7ff2819e 1624 "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
7f032d6e
JP
1625 SIZE_SPEC " " SIZE_SPEC "\n",
1626 shp->shm_perm.key,
1627 shp->shm_perm.id,
1628 shp->shm_perm.mode,
1629 shp->shm_segsz,
1630 shp->shm_cprid,
1631 shp->shm_lprid,
1632 shp->shm_nattch,
1633 from_kuid_munged(user_ns, shp->shm_perm.uid),
1634 from_kgid_munged(user_ns, shp->shm_perm.gid),
1635 from_kuid_munged(user_ns, shp->shm_perm.cuid),
1636 from_kgid_munged(user_ns, shp->shm_perm.cgid),
1637 shp->shm_atim,
1638 shp->shm_dtim,
1639 shp->shm_ctim,
1640 rss * PAGE_SIZE,
1641 swp * PAGE_SIZE);
1642
1643 return 0;
1da177e4
LT
1644}
1645#endif