path: root/ipc
diff options
authorLinus Torvalds <>2020-06-03 13:12:57 -0700
committerLinus Torvalds <>2020-06-03 13:12:57 -0700
commite7c93cbfe9cb4b0a47633099e78c455b1f79bbac (patch)
tree3fe9238ab3000db445c26a6ae0cc769110512281 /ipc
parentd479c5a1919b4e569dcd3ae9c84ed74a675d0b94 (diff)
parent2b40c5db73e239531ea54991087f4edc07fbb08e (diff)
Merge tag 'threads-v5.8' of git://
Pull thread updates from Christian Brauner: "We have been discussing using pidfds to attach to namespaces for quite a while and the patches have in one form or another already existed for about a year. But I wanted to wait to see how the general api would be received and adopted. This contains the changes to make it possible to use pidfds to attach to the namespaces of a process, i.e. they can be passed as the first argument to the setns() syscall. When only a single namespace type is specified the semantics are equivalent to passing an nsfd. That means setns(nsfd, CLONE_NEWNET) equals setns(pidfd, CLONE_NEWNET). However, when a pidfd is passed, multiple namespace flags can be specified in the second setns() argument and setns() will attach the caller to all the specified namespaces all at once or to none of them. Specifying 0 is not valid together with a pidfd. Here are just two obvious examples: setns(pidfd, CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET); setns(pidfd, CLONE_NEWUSER); Allowing to also attach subsets of namespaces supports various use-cases where callers setns to a subset of namespaces to retain privilege, perform an action and then re-attach another subset of namespaces. Apart from significantly reducing the number of syscalls needed to attach to all currently supported namespaces (eight "open+setns" sequences vs just a single "setns()"), this also allows atomic setns to a set of namespaces, i.e. either attaching to all namespaces succeeds or we fail without having changed anything. This is centered around a new internal struct nsset which holds all information necessary for a task to switch to a new set of namespaces atomically. Fwiw, with this change a pidfd becomes the only token needed to interact with a container. I'm expecting this to be picked-up by util-linux for nsenter rather soon. Associated with this change is a shiny new test-suite dedicated to setns() (for pidfds and nsfds alike)" * tag 'threads-v5.8' of git:// selftests/pidfd: add pidfd setns tests nsproxy: attach to namespaces via pidfds nsproxy: add struct nsset
Diffstat (limited to 'ipc')
1 files changed, 3 insertions, 4 deletions
diff --git a/ipc/namespace.c b/ipc/namespace.c
index b3ca1476ca51..fdc3b5f3f53a 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -177,15 +177,14 @@ static void ipcns_put(struct ns_common *ns)
return put_ipc_ns(to_ipc_ns(ns));
-static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
+static int ipcns_install(struct nsset *nsset, struct ns_common *new)
+ struct nsproxy *nsproxy = nsset->nsproxy;
struct ipc_namespace *ns = to_ipc_ns(new);
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
- !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
return -EPERM;
- /* Ditch state from the old ipc namespace */
- exit_sem(current);
nsproxy->ipc_ns = get_ipc_ns(ns);
return 0;