As long as recvmsg() or recvmmsg() is used with cmsg, it is not
possible to avoid receiving file descriptors via SCM_RIGHTS.
This behaviour has occasionally been flagged as problematic, as
it can be (ab)used to trigger DoS during close(), for example, by
passing a FUSE-controlled fd or a hung NFS fd.
For instance, as noted on the uAPI Group page [0], an untrusted peer
could send a file descriptor pointing to a hung NFS mount and then
close it. Once the receiver calls recvmsg() with msg_control, the
descriptor is automatically installed, and then the responsibility
for the final close() now falls on the receiver, which may result
in blocking the process for a long time.
Regarding this, systemd calls cmsg_close_all() [1] after each
recvmsg() to close() unwanted file descriptors sent via SCM_RIGHTS.
However, this cannot work around the issue at all, because the final
fput() may still occur on the receiver's side once sendmsg() with
SCM_RIGHTS succeeds. Also, even filtering by LSM at recvmsg() does
not work for the same reason.
Thus, we need a better way to refuse SCM_RIGHTS at sendmsg().
Let's introduce SO_PASSRIGHTS to disable SCM_RIGHTS.
Note that this option is enabled by default for backward
compatibility.
Link: https://uapi-group.org/kernel-features/#disabling-reception-of-scm_rights-for-af_unix-sockets
Link: https://github.com/systemd/systemd/blob/v257.5/src/basic/fd-util.c#L612-L628
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
#define SO_RCVPRIORITY 82
+#define SO_PASSRIGHTS 83
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
#define SO_RCVPRIORITY 82
+#define SO_PASSRIGHTS 83
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
#define SO_DEVMEM_DONTNEED 0x4050
+#define SO_PASSRIGHTS 0x4051
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
#define SO_RCVPRIORITY 0x005b
+#define SO_PASSRIGHTS 0x005c
+
#if !defined(__KERNEL__)
* @sk_scm_credentials: flagged by SO_PASSCRED to recv SCM_CREDENTIALS
* @sk_scm_security: flagged by SO_PASSSEC to recv SCM_SECURITY
* @sk_scm_pidfd: flagged by SO_PASSPIDFD to recv SCM_PIDFD
+ * @sk_scm_rights: flagged by SO_PASSRIGHTS to recv SCM_RIGHTS
* @sk_scm_unused: unused flags for scm_recv()
* @ns_tracker: tracker for netns reference
* @sk_user_frags: xarray of pages the user is holding a reference on.
u8 sk_scm_credentials : 1,
sk_scm_security : 1,
sk_scm_pidfd : 1,
- sk_scm_unused : 5;
+ sk_scm_rights : 1,
+ sk_scm_unused : 4;
};
};
u8 sk_clockid;
#define SO_RCVPRIORITY 82
+#define SO_PASSRIGHTS 83
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
ret = -EOPNOTSUPP;
break;
+ case SO_PASSRIGHTS:
+ if (sk_is_unix(sk))
+ sk->sk_scm_rights = valbool;
+ else
+ ret = -EOPNOTSUPP;
+ break;
+
case SO_INCOMING_CPU:
reuseport_update_incoming_cpu(sk, val);
break;
v.val = sk->sk_scm_pidfd;
break;
+ case SO_PASSRIGHTS:
+ if (!sk_is_unix(sk))
+ return -EOPNOTSUPP;
+
+ v.val = sk->sk_scm_rights;
+ break;
+
case SO_PEERCRED:
{
struct ucred peercred;
sock_init_data(sock, sk);
+ sk->sk_scm_rights = 1;
sk->sk_hash = unix_unbound_hash(sk);
sk->sk_allocation = GFP_KERNEL_ACCOUNT;
sk->sk_write_space = unix_write_space;
goto out_unlock;
}
+ if (UNIXCB(skb).fp && !other->sk_scm_rights) {
+ err = -EPERM;
+ goto out_unlock;
+ }
+
if (sk->sk_type != SOCK_SEQPACKET) {
err = security_unix_may_send(sk->sk_socket, other->sk_socket);
if (err)
if (sock_flag(other, SOCK_DEAD) ||
(other->sk_shutdown & RCV_SHUTDOWN)) {
- unix_state_unlock(other);
err = -EPIPE;
- goto out;
+ goto out_unlock;
+ }
+
+ if (UNIXCB(skb).fp && !other->sk_scm_rights) {
+ err = -EPERM;
+ goto out_unlock;
}
unix_maybe_add_creds(skb, sk, other);
other->sk_data_ready(other);
return 0;
+out_unlock:
+ unix_state_unlock(other);
out:
consume_skb(skb);
return err;
(other->sk_shutdown & RCV_SHUTDOWN))
goto out_pipe_unlock;
+ if (UNIXCB(skb).fp && !other->sk_scm_rights) {
+ unix_state_unlock(other);
+ err = -EPERM;
+ goto out_free;
+ }
+
unix_maybe_add_creds(skb, sk, other);
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
#define SO_RCVPRIORITY 82
+#define SO_PASSRIGHTS 83
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))