Introduce a sysctl that modifies the value of PROT_SOCK.
authorKrister Johansen <kjlx@templeofstupid.com>
Sat, 21 Jan 2017 01:49:11 +0000 (17:49 -0800)
committerDavid S. Miller <davem@davemloft.net>
Tue, 24 Jan 2017 17:10:51 +0000 (12:10 -0500)
Add net.ipv4.ip_unprivileged_port_start, which is a per namespace sysctl
that denotes the first unprivileged inet port in the namespace.  To
disable all privileged ports set this to zero.  It also checks for
overlap with the local port range.  The privileged and local range may
not overlap.

The use case for this change is to allow containerized processes to bind
to priviliged ports, but prevent them from ever being allowed to modify
their container's network configuration.  The latter is accomplished by
ensuring that the network namespace is not a child of the user
namespace.  This modification was needed to allow the container manager
to disable a namespace's priviliged port restrictions without exposing
control of the network namespace to processes in the user namespace.

Signed-off-by: Krister Johansen <kjlx@templeofstupid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/ip-sysctl.txt
include/net/ip.h
include/net/netns/ipv4.h
net/ipv4/af_inet.c
net/ipv4/sysctl_net_ipv4.c
net/ipv6/af_inet6.c
net/netfilter/ipvs/ip_vs_ctl.c
net/sctp/socket.c
security/selinux/hooks.c

index aa1bb49f1dc67fbea70812aef6f72ce8663f5680..17f2e7791042c4b6f3d8dadce4e9ad90bf0524d2 100644 (file)
@@ -822,6 +822,15 @@ ip_local_reserved_ports - list of comma separated ranges
 
        Default: Empty
 
+ip_unprivileged_port_start - INTEGER
+       This is a per-namespace sysctl.  It defines the first
+       unprivileged port in the network namespace.  Privileged ports
+       require root or CAP_NET_BIND_SERVICE in order to bind to them.
+       To disable all privileged ports, set this to 0.  It may not
+       overlap with the ip_local_reserved_ports range.
+
+       Default: 1024
+
 ip_nonlocal_bind - BOOLEAN
        If set, allows processes to bind() to non-local IP addresses,
        which can be quite useful - but may break some applications.
index ab6761a7c883a756583f570dc861af1e1d30e67f..bf264a8db1ce3b1b29b9a5cc9732df323154e7e5 100644 (file)
@@ -263,11 +263,21 @@ static inline bool sysctl_dev_name_is_allowed(const char *name)
        return strcmp(name, "default") != 0  && strcmp(name, "all") != 0;
 }
 
+static inline int inet_prot_sock(struct net *net)
+{
+       return net->ipv4.sysctl_ip_prot_sock;
+}
+
 #else
 static inline int inet_is_local_reserved_port(struct net *net, int port)
 {
        return 0;
 }
+
+static inline int inet_prot_sock(struct net *net)
+{
+       return PROT_SOCK;
+}
 #endif
 
 __be32 inet_current_timestamp(void);
index 8e3f5b6f26d57cfaf813e3ee243de27494f3db45..e365732b8051ea1e376b06cae6fc9645b3b47d33 100644 (file)
@@ -135,6 +135,7 @@ struct netns_ipv4 {
 
 #ifdef CONFIG_SYSCTL
        unsigned long *sysctl_local_reserved_ports;
+       int sysctl_ip_prot_sock;
 #endif
 
 #ifdef CONFIG_IP_MROUTE
index aae410bb655abab70705041f733c6156305f8362..28fe8da4e1ac6230b5ddab88be9a0b9dcb8ad800 100644 (file)
@@ -479,7 +479,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
        snum = ntohs(addr->sin_port);
        err = -EACCES;
-       if (snum && snum < PROT_SOCK &&
+       if (snum && snum < inet_prot_sock(net) &&
            !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
                goto out;
 
@@ -1700,6 +1700,9 @@ static __net_init int inet_init_net(struct net *net)
        net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
        net->ipv4.sysctl_ip_dynaddr = 0;
        net->ipv4.sysctl_ip_early_demux = 1;
+#ifdef CONFIG_SYSCTL
+       net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
+#endif
 
        return 0;
 }
index c8d283615c6fb9806e0095445c926195b3e28ec6..1b861997fdc50af45b3a4bc193f72829e195804a 100644 (file)
@@ -35,6 +35,8 @@ static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
 static int tcp_adv_win_scale_min = -31;
 static int tcp_adv_win_scale_max = 31;
+static int ip_privileged_port_min;
+static int ip_privileged_port_max = 65535;
 static int ip_ttl_min = 1;
 static int ip_ttl_max = 255;
 static int tcp_syn_retries_min = 1;
@@ -79,7 +81,12 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 
        if (write && ret == 0) {
-               if (range[1] < range[0])
+               /* Ensure that the upper limit is not smaller than the lower,
+                * and that the lower does not encroach upon the privileged
+                * port limit.
+                */
+               if ((range[1] < range[0]) ||
+                   (range[0] < net->ipv4.sysctl_ip_prot_sock))
                        ret = -EINVAL;
                else
                        set_local_port_range(net, range);
@@ -88,6 +95,40 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
        return ret;
 }
 
+/* Validate changes from /proc interface. */
+static int ipv4_privileged_ports(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       struct net *net = container_of(table->data, struct net,
+           ipv4.sysctl_ip_prot_sock);
+       int ret;
+       int pports;
+       int range[2];
+       struct ctl_table tmp = {
+               .data = &pports,
+               .maxlen = sizeof(pports),
+               .mode = table->mode,
+               .extra1 = &ip_privileged_port_min,
+               .extra2 = &ip_privileged_port_max,
+       };
+
+       pports = net->ipv4.sysctl_ip_prot_sock;
+
+       ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+       if (write && ret == 0) {
+               inet_get_local_port_range(net, &range[0], &range[1]);
+               /* Ensure that the local port range doesn't overlap with the
+                * privileged port range.
+                */
+               if (range[0] < pports)
+                       ret = -EINVAL;
+               else
+                       net->ipv4.sysctl_ip_prot_sock = pports;
+       }
+
+       return ret;
+}
 
 static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
 {
@@ -964,6 +1005,13 @@ static struct ctl_table ipv4_net_table[] = {
                .extra2         = &one,
        },
 #endif
+       {
+               .procname       = "ip_unprivileged_port_start",
+               .maxlen         = sizeof(int),
+               .data           = &init_net.ipv4.sysctl_ip_prot_sock,
+               .mode           = 0644,
+               .proc_handler   = ipv4_privileged_ports,
+       },
        { }
 };
 
index aa42123bc301f9c3877db49a05e6379c715aa384..04db40620ea65c1f369ef63490383e92def722ff 100644 (file)
@@ -302,7 +302,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                return -EINVAL;
 
        snum = ntohs(addr->sin6_port);
-       if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+       if (snum && snum < inet_prot_sock(net) &&
+           !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
                return -EACCES;
 
        lock_sock(sk);
index 55e0169caa4ce2fe81f0c9f1199c2be84c986b58..8b7416f4e01a68e758abe42b655a23f051d88bdc 100644 (file)
@@ -426,10 +426,9 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol
         */
        svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
 
-       if (svc == NULL
-           && protocol == IPPROTO_TCP
-           && atomic_read(&ipvs->ftpsvc_counter)
-           && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
+       if (!svc && protocol == IPPROTO_TCP &&
+           atomic_read(&ipvs->ftpsvc_counter) &&
+           (vport == FTPDATA || ntohs(vport) >= inet_prot_sock(ipvs->net))) {
                /*
                 * Check if ftp service entry exists, the packet
                 * might belong to FTP data connections.
index bee4dd3feabb42a941cf1e4f8bf310d34a2f0603..d699d2cbf27563fcefd3dfe6d5bac87ab62a08f5 100644 (file)
@@ -360,7 +360,7 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
                }
        }
 
-       if (snum && snum < PROT_SOCK &&
+       if (snum && snum < inet_prot_sock(net) &&
            !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
                return -EACCES;
 
@@ -1152,8 +1152,10 @@ static int __sctp_connect(struct sock *sk,
                                 * accept new associations, but it SHOULD NOT
                                 * be permitted to open new associations.
                                 */
-                               if (ep->base.bind_addr.port < PROT_SOCK &&
-                                   !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
+                               if (ep->base.bind_addr.port <
+                                   inet_prot_sock(net) &&
+                                   !ns_capable(net->user_ns,
+                                   CAP_NET_BIND_SERVICE)) {
                                        err = -EACCES;
                                        goto out_free;
                                }
@@ -1818,7 +1820,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
                         * but it SHOULD NOT be permitted to open new
                         * associations.
                         */
-                       if (ep->base.bind_addr.port < PROT_SOCK &&
+                       if (ep->base.bind_addr.port < inet_prot_sock(net) &&
                            !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
                                err = -EACCES;
                                goto out_unlock;
index c7c6619431d5fb4922dd729e9e49dd910d7967d2..53cb6da5f1c68b245de060b3d88463bd6c7b9779 100644 (file)
@@ -4365,7 +4365,8 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
 
                        inet_get_local_port_range(sock_net(sk), &low, &high);
 
-                       if (snum < max(PROT_SOCK, low) || snum > high) {
+                       if (snum < max(inet_prot_sock(sock_net(sk)), low) ||
+                           snum > high) {
                                err = sel_netport_sid(sk->sk_protocol,
                                                      snum, &sid);
                                if (err)