Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
ae5e1b22 PE |
2 | /* |
3 | * linux/ipc/namespace.c | |
4 | * Copyright (C) 2006 Pavel Emelyanov <xemul@openvz.org> OpenVZ, SWsoft Inc. | |
5 | */ | |
6 | ||
7 | #include <linux/ipc.h> | |
8 | #include <linux/msg.h> | |
9 | #include <linux/ipc_namespace.h> | |
10 | #include <linux/rcupdate.h> | |
11 | #include <linux/nsproxy.h> | |
12 | #include <linux/slab.h> | |
5b825c3a | 13 | #include <linux/cred.h> |
7eafd7c7 SH |
14 | #include <linux/fs.h> |
15 | #include <linux/mount.h> | |
b515498f | 16 | #include <linux/user_namespace.h> |
0bb80f24 | 17 | #include <linux/proc_ns.h> |
f719ff9b | 18 | #include <linux/sched/task.h> |
ae5e1b22 PE |
19 | |
20 | #include "util.h" | |
21 | ||
a80c4adc RR |
22 | /* |
23 | * The work queue is used to avoid the cost of synchronize_rcu in kern_unmount. | |
24 | */ | |
25 | static void free_ipc(struct work_struct *unused); | |
26 | static DECLARE_WORK(free_ipc_work, free_ipc); | |
27 | ||
aba35661 EB |
28 | static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns) |
29 | { | |
30 | return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES); | |
31 | } | |
32 | ||
33 | static void dec_ipc_namespaces(struct ucounts *ucounts) | |
34 | { | |
35 | dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES); | |
36 | } | |
37 | ||
bcf58e72 | 38 | static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, |
b0e77598 | 39 | struct ipc_namespace *old_ns) |
ae5e1b22 | 40 | { |
ae5e1b22 | 41 | struct ipc_namespace *ns; |
aba35661 | 42 | struct ucounts *ucounts; |
7eafd7c7 | 43 | int err; |
ae5e1b22 | 44 | |
df75e774 | 45 | err = -ENOSPC; |
a80c4adc | 46 | again: |
aba35661 | 47 | ucounts = inc_ipc_namespaces(user_ns); |
a80c4adc RR |
48 | if (!ucounts) { |
49 | /* | |
50 | * IPC namespaces are freed asynchronously, by free_ipc_work. | |
51 | * If frees were pending, flush_work will wait, and | |
52 | * return true. Fail the allocation if no frees are pending. | |
53 | */ | |
54 | if (flush_work(&free_ipc_work)) | |
55 | goto again; | |
aba35661 | 56 | goto fail; |
a80c4adc | 57 | } |
aba35661 EB |
58 | |
59 | err = -ENOMEM; | |
30acd0bd | 60 | ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL_ACCOUNT); |
ae5e1b22 | 61 | if (ns == NULL) |
aba35661 | 62 | goto fail_dec; |
ae5e1b22 | 63 | |
6344c433 | 64 | err = ns_alloc_inum(&ns->ns); |
aba35661 EB |
65 | if (err) |
66 | goto fail_free; | |
33c42940 | 67 | ns->ns.ops = &ipcns_operations; |
98f842e6 | 68 | |
137ec390 | 69 | refcount_set(&ns->ns.count, 1); |
b236017a | 70 | ns->user_ns = get_user_ns(user_ns); |
aba35661 | 71 | ns->ucounts = ucounts; |
b236017a | 72 | |
eae04d25 | 73 | err = mq_init_ns(ns); |
aba35661 EB |
74 | if (err) |
75 | goto fail_put; | |
4d89dc6a | 76 | |
dc55e35f AG |
77 | err = -ENOMEM; |
78 | if (!setup_mq_sysctls(ns)) | |
79 | goto fail_put; | |
80 | ||
1f5c135e | 81 | if (!setup_ipc_sysctls(ns)) |
db7cfc38 | 82 | goto fail_mq; |
1f5c135e | 83 | |
72d1e611 JS |
84 | err = msg_init_ns(ns); |
85 | if (err) | |
86 | goto fail_put; | |
87 | ||
eae04d25 | 88 | sem_init_ns(ns); |
eae04d25 | 89 | shm_init_ns(ns); |
ae5e1b22 | 90 | |
ae5e1b22 | 91 | return ns; |
aba35661 | 92 | |
db7cfc38 AG |
93 | fail_mq: |
94 | retire_mq_sysctls(ns); | |
95 | ||
aba35661 EB |
96 | fail_put: |
97 | put_user_ns(ns->user_ns); | |
98 | ns_free_inum(&ns->ns); | |
99 | fail_free: | |
100 | kfree(ns); | |
101 | fail_dec: | |
102 | dec_ipc_namespaces(ucounts); | |
103 | fail: | |
104 | return ERR_PTR(err); | |
ae5e1b22 PE |
105 | } |
106 | ||
b0e77598 | 107 | struct ipc_namespace *copy_ipcs(unsigned long flags, |
bcf58e72 | 108 | struct user_namespace *user_ns, struct ipc_namespace *ns) |
ae5e1b22 | 109 | { |
ae5e1b22 | 110 | if (!(flags & CLONE_NEWIPC)) |
64424289 | 111 | return get_ipc_ns(ns); |
bcf58e72 | 112 | return create_ipc_ns(user_ns, ns); |
ae5e1b22 PE |
113 | } |
114 | ||
01b8b07a PP |
115 | /* |
116 | * free_ipcs - free all ipcs of one type | |
117 | * @ns: the namespace to remove the ipcs from | |
118 | * @ids: the table of ipcs to free | |
119 | * @free: the function called to free each individual ipc | |
120 | * | |
121 | * Called for each kind of ipc when an ipc_namespace exits. | |
122 | */ | |
123 | void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, | |
124 | void (*free)(struct ipc_namespace *, struct kern_ipc_perm *)) | |
125 | { | |
126 | struct kern_ipc_perm *perm; | |
127 | int next_id; | |
128 | int total, in_use; | |
129 | ||
d9a605e4 | 130 | down_write(&ids->rwsem); |
01b8b07a PP |
131 | |
132 | in_use = ids->in_use; | |
133 | ||
134 | for (total = 0, next_id = 0; total < in_use; next_id++) { | |
135 | perm = idr_find(&ids->ipcs_idr, next_id); | |
136 | if (perm == NULL) | |
137 | continue; | |
32a27500 DB |
138 | rcu_read_lock(); |
139 | ipc_lock_object(perm); | |
01b8b07a PP |
140 | free(ns, perm); |
141 | total++; | |
142 | } | |
d9a605e4 | 143 | up_write(&ids->rwsem); |
01b8b07a PP |
144 | } |
145 | ||
b4188def AD |
146 | static void free_ipc_ns(struct ipc_namespace *ns) |
147 | { | |
da27f796 RR |
148 | /* |
149 | * Caller needs to wait for an RCU grace period to have passed | |
150 | * after making the mount point inaccessible to new accesses. | |
e1eb26fa | 151 | */ |
da27f796 | 152 | mntput(ns->mq_mnt); |
b4188def AD |
153 | sem_exit_ns(ns); |
154 | msg_exit_ns(ns); | |
155 | shm_exit_ns(ns); | |
b4188def | 156 | |
dc55e35f | 157 | retire_mq_sysctls(ns); |
1f5c135e | 158 | retire_ipc_sysctls(ns); |
dc55e35f | 159 | |
aba35661 | 160 | dec_ipc_namespaces(ns->ucounts); |
b515498f | 161 | put_user_ns(ns->user_ns); |
6344c433 | 162 | ns_free_inum(&ns->ns); |
be4d250a | 163 | kfree(ns); |
b4188def AD |
164 | } |
165 | ||
e1eb26fa GS |
166 | static LLIST_HEAD(free_ipc_list); |
167 | static void free_ipc(struct work_struct *unused) | |
168 | { | |
169 | struct llist_node *node = llist_del_all(&free_ipc_list); | |
170 | struct ipc_namespace *n, *t; | |
171 | ||
da27f796 RR |
172 | llist_for_each_entry_safe(n, t, node, mnt_llist) |
173 | mnt_make_shortterm(n->mq_mnt); | |
174 | ||
175 | /* Wait for any last users to have gone away. */ | |
176 | synchronize_rcu(); | |
177 | ||
e1eb26fa GS |
178 | llist_for_each_entry_safe(n, t, node, mnt_llist) |
179 | free_ipc_ns(n); | |
180 | } | |
181 | ||
7eafd7c7 SH |
182 | /* |
183 | * put_ipc_ns - drop a reference to an ipc namespace. | |
184 | * @ns: the namespace to put | |
185 | * | |
186 | * If this is the last task in the namespace exiting, and | |
187 | * it is dropping the refcount to 0, then it can race with | |
188 | * a task in another ipc namespace but in a mounts namespace | |
189 | * which has this ipcns's mqueuefs mounted, doing some action | |
190 | * with one of the mqueuefs files. That can raise the refcount. | |
191 | * So dropping the refcount, and raising the refcount when | |
192 | * accessing it through the VFS, are protected with mq_lock. | |
193 | * | |
194 | * (Clearly, a task raising the refcount on its own ipc_ns | |
195 | * needn't take mq_lock since it can't race with the last task | |
196 | * in the ipcns exiting). | |
197 | */ | |
198 | void put_ipc_ns(struct ipc_namespace *ns) | |
ae5e1b22 | 199 | { |
137ec390 | 200 | if (refcount_dec_and_lock(&ns->ns.count, &mq_lock)) { |
7eafd7c7 SH |
201 | mq_clear_sbinfo(ns); |
202 | spin_unlock(&mq_lock); | |
e1eb26fa GS |
203 | |
204 | if (llist_add(&ns->mnt_llist, &free_ipc_list)) | |
205 | schedule_work(&free_ipc_work); | |
7eafd7c7 SH |
206 | } |
207 | } | |
a00eaf11 | 208 | |
3c041184 AV |
209 | static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns) |
210 | { | |
211 | return container_of(ns, struct ipc_namespace, ns); | |
212 | } | |
213 | ||
64964528 | 214 | static struct ns_common *ipcns_get(struct task_struct *task) |
a00eaf11 EB |
215 | { |
216 | struct ipc_namespace *ns = NULL; | |
217 | struct nsproxy *nsproxy; | |
218 | ||
728dba3a EB |
219 | task_lock(task); |
220 | nsproxy = task->nsproxy; | |
a00eaf11 EB |
221 | if (nsproxy) |
222 | ns = get_ipc_ns(nsproxy->ipc_ns); | |
728dba3a | 223 | task_unlock(task); |
a00eaf11 | 224 | |
3c041184 | 225 | return ns ? &ns->ns : NULL; |
a00eaf11 EB |
226 | } |
227 | ||
64964528 | 228 | static void ipcns_put(struct ns_common *ns) |
a00eaf11 | 229 | { |
3c041184 | 230 | return put_ipc_ns(to_ipc_ns(ns)); |
a00eaf11 EB |
231 | } |
232 | ||
f2a8d52e | 233 | static int ipcns_install(struct nsset *nsset, struct ns_common *new) |
a00eaf11 | 234 | { |
f2a8d52e | 235 | struct nsproxy *nsproxy = nsset->nsproxy; |
3c041184 | 236 | struct ipc_namespace *ns = to_ipc_ns(new); |
5e4a0847 | 237 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || |
f2a8d52e | 238 | !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) |
142e1d1d EB |
239 | return -EPERM; |
240 | ||
a00eaf11 EB |
241 | put_ipc_ns(nsproxy->ipc_ns); |
242 | nsproxy->ipc_ns = get_ipc_ns(ns); | |
243 | return 0; | |
244 | } | |
245 | ||
bcac25a5 AV |
246 | static struct user_namespace *ipcns_owner(struct ns_common *ns) |
247 | { | |
248 | return to_ipc_ns(ns)->user_ns; | |
249 | } | |
250 | ||
a00eaf11 EB |
251 | const struct proc_ns_operations ipcns_operations = { |
252 | .name = "ipc", | |
253 | .type = CLONE_NEWIPC, | |
254 | .get = ipcns_get, | |
255 | .put = ipcns_put, | |
256 | .install = ipcns_install, | |
bcac25a5 | 257 | .owner = ipcns_owner, |
a00eaf11 | 258 | }; |