}, }
};
-int pid_max = PID_MAX_DEFAULT;
-
-int pid_max_min = RESERVED_PIDS + 1;
-int pid_max_max = PID_MAX_LIMIT;
+static int pid_max_min = RESERVED_PIDS + 1;
+static int pid_max_max = PID_MAX_LIMIT;
/*
* Pseudo filesystems start inode numbering after one. We use Reserved
* PIDs as a natural offset.
#ifdef CONFIG_PID_NS
.ns.ops = &pidns_operations,
#endif
+ .pid_max = PID_MAX_DEFAULT,
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
.memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
#endif
for (i = ns->level; i >= 0; i--) {
int tid = 0;
+ int pid_max = READ_ONCE(tmp->pid_max);
if (set_tid_size) {
tid = set_tid[ns->level - i];
return fd;
}
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_set *pid_table_root_lookup(struct ctl_table_root *root)
+{
+ return &task_active_pid_ns(current)->set;
+}
+
+static int set_is_seen(struct ctl_table_set *set)
+{
+ return &task_active_pid_ns(current)->set == set;
+}
+
+static int pid_table_root_permissions(struct ctl_table_header *head,
+ const struct ctl_table *table)
+{
+ struct pid_namespace *pidns =
+ container_of(head->set, struct pid_namespace, set);
+ int mode = table->mode;
+
+ if (ns_capable(pidns->user_ns, CAP_SYS_ADMIN) ||
+ uid_eq(current_euid(), make_kuid(pidns->user_ns, 0)))
+ mode = (mode & S_IRWXU) >> 6;
+ else if (in_egroup_p(make_kgid(pidns->user_ns, 0)))
+ mode = (mode & S_IRWXG) >> 3;
+ else
+ mode = mode & S_IROTH;
+ return (mode << 6) | (mode << 3) | mode;
+}
+
+static void pid_table_root_set_ownership(struct ctl_table_header *head,
+ kuid_t *uid, kgid_t *gid)
+{
+ struct pid_namespace *pidns =
+ container_of(head->set, struct pid_namespace, set);
+ kuid_t ns_root_uid;
+ kgid_t ns_root_gid;
+
+ ns_root_uid = make_kuid(pidns->user_ns, 0);
+ if (uid_valid(ns_root_uid))
+ *uid = ns_root_uid;
+
+ ns_root_gid = make_kgid(pidns->user_ns, 0);
+ if (gid_valid(ns_root_gid))
+ *gid = ns_root_gid;
+}
+
+static struct ctl_table_root pid_table_root = {
+ .lookup = pid_table_root_lookup,
+ .permissions = pid_table_root_permissions,
+ .set_ownership = pid_table_root_set_ownership,
+};
+
+static struct ctl_table pid_table[] = {
+ {
+ .procname = "pid_max",
+ .data = &init_pid_ns.pid_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &pid_max_min,
+ .extra2 = &pid_max_max,
+ },
+};
+#endif
+
+int register_pidns_sysctls(struct pid_namespace *pidns)
+{
+#ifdef CONFIG_SYSCTL
+ struct ctl_table *tbl;
+
+ setup_sysctl_set(&pidns->set, &pid_table_root, set_is_seen);
+
+ tbl = kmemdup(pid_table, sizeof(pid_table), GFP_KERNEL);
+ if (!tbl)
+ return -ENOMEM;
+ tbl->data = &pidns->pid_max;
+ pidns->pid_max = min(pid_max_max, max_t(int, pidns->pid_max,
+ PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
+
+ pidns->sysctls = __register_sysctl_table(&pidns->set, "kernel", tbl,
+ ARRAY_SIZE(pid_table));
+ if (!pidns->sysctls) {
+ kfree(tbl);
+ retire_sysctl_set(&pidns->set);
+ return -ENOMEM;
+ }
+#endif
+ return 0;
+}
+
+void unregister_pidns_sysctls(struct pid_namespace *pidns)
+{
+#ifdef CONFIG_SYSCTL
+ const struct ctl_table *tbl;
+
+ tbl = pidns->sysctls->ctl_table_arg;
+ unregister_sysctl_table(pidns->sysctls);
+ retire_sysctl_set(&pidns->set);
+ kfree(tbl);
+#endif
+}
+
void __init pid_idr_init(void)
{
/* Verify no one has done anything silly: */
BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
/* bump default and minimum pid_max based on number of cpus */
- pid_max = min(pid_max_max, max_t(int, pid_max,
- PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
+ init_pid_ns.pid_max = min(pid_max_max, max_t(int, init_pid_ns.pid_max,
+ PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
pid_max_min = max_t(int, pid_max_min,
PIDS_PER_CPU_MIN * num_possible_cpus());
- pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
+ pr_info("pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max, pid_max_min);
idr_init(&init_pid_ns.idr);
NULL);
}
+static __init int pid_namespace_sysctl_init(void)
+{
+#ifdef CONFIG_SYSCTL
+ /* "kernel" directory will have already been initialized. */
+ BUG_ON(register_pidns_sysctls(&init_pid_ns));
+#endif
+ return 0;
+}
+subsys_initcall(pid_namespace_sysctl_init);
+
static struct file *__pidfd_fget(struct task_struct *task, int fd)
{
struct file *file;
dec_ucount(ucounts, UCOUNT_PID_NAMESPACES);
}
+static void destroy_pid_namespace_work(struct work_struct *work);
+
static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
struct pid_namespace *parent_pid_ns)
{
goto out_free_idr;
ns->ns.ops = &pidns_operations;
+ ns->pid_max = parent_pid_ns->pid_max;
+ err = register_pidns_sysctls(ns);
+ if (err)
+ goto out_free_inum;
+
refcount_set(&ns->ns.count, 1);
ns->level = level;
ns->parent = get_pid_ns(parent_pid_ns);
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
ns->pid_allocated = PIDNS_ADDING;
+ INIT_WORK(&ns->work, destroy_pid_namespace_work);
+
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns);
#endif
+
return ns;
+out_free_inum:
+ ns_free_inum(&ns->ns);
out_free_idr:
idr_destroy(&ns->idr);
kmem_cache_free(pid_ns_cachep, ns);
static void destroy_pid_namespace(struct pid_namespace *ns)
{
+ unregister_pidns_sysctls(ns);
+
ns_free_inum(&ns->ns);
idr_destroy(&ns->idr);
call_rcu(&ns->rcu, delayed_free_pidns);
}
+static void destroy_pid_namespace_work(struct work_struct *work)
+{
+ struct pid_namespace *ns =
+ container_of(work, struct pid_namespace, work);
+
+ do {
+ struct pid_namespace *parent;
+
+ parent = ns->parent;
+ destroy_pid_namespace(ns);
+ ns = parent;
+ } while (ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count));
+}
+
struct pid_namespace *copy_pid_ns(unsigned long flags,
struct user_namespace *user_ns, struct pid_namespace *old_ns)
{
void put_pid_ns(struct pid_namespace *ns)
{
- struct pid_namespace *parent;
-
- while (ns != &init_pid_ns) {
- parent = ns->parent;
- if (!refcount_dec_and_test(&ns->ns.count))
- break;
- destroy_pid_namespace(ns);
- ns = parent;
- }
+ if (ns && ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count))
+ schedule_work(&ns->work);
}
EXPORT_SYMBOL_GPL(put_pid_ns);
next = idr_get_cursor(&pid_ns->idr) - 1;
tmp.data = &next;
+ tmp.extra2 = &pid_ns->pid_max;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (!ret && write)
idr_set_cursor(&pid_ns->idr, next + 1);
return ret;
}
-extern int pid_max;
static struct ctl_table pid_ns_ctl_table[] = {
{
.procname = "ns_last_pid",
.mode = 0666, /* permissions are checked in the handler */
.proc_handler = pid_ns_ctl_handler,
.extra1 = SYSCTL_ZERO,
- .extra2 = &pid_max,
+ .extra2 = &init_pid_ns.pid_max,
},
};
#endif /* CONFIG_CHECKPOINT_RESTORE */