From 0fb44559ffd67de8517098b81f675fa0210f13f0 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 23 Jan 2017 11:17:35 -0800 Subject: af_unix: move unix_mknod() out of bindlock Dmitry reported a deadlock scenario: unix_bind() path: u->bindlock ==> sb_writer do_splice() path: sb_writer ==> pipe->mutex ==> u->bindlock In the unix_bind() code path, unix_mknod() does not have to be done with u->bindlock held, since it is a pure fs operation, so we can just move unix_mknod() out. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Rainer Weikusat Cc: Al Viro Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/unix/af_unix.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 127656ebe7be..cef79873b09d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -995,6 +995,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) unsigned int hash; struct unix_address *addr; struct hlist_head *list; + struct path path = { NULL, NULL }; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) @@ -1010,9 +1011,20 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; + if (sun_path[0]) { + umode_t mode = S_IFSOCK | + (SOCK_INODE(sock)->i_mode & ~current_umask()); + err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; + goto out; + } + } + err = mutex_lock_interruptible(&u->bindlock); if (err) - goto out; + goto out_put; err = -EINVAL; if (u->addr) @@ -1029,16 +1041,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - struct path path; - umode_t mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(sun_path, mode, &path); - if (err) { - if (err == -EEXIST) - err = -EADDRINUSE; - unix_release_addr(addr); - goto out_up; - } addr->hash = UNIX_HASH_SIZE; hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); spin_lock(&unix_table_lock); @@ -1065,6 +1067,9 @@ out_unlock: spin_unlock(&unix_table_lock); out_up: mutex_unlock(&u->bindlock); +out_put: + if (err) + path_put(&path); out: return err; } -- cgit v1.2.1 From ba94f3088b792b16ea576a256a6030feddc87f24 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 1 Feb 2017 11:00:45 -0800 Subject: unix: add ioctl to open a unix socket file with O_PATH This ioctl opens a file to which a socket is bound and returns a file descriptor. The caller has to have CAP_NET_ADMIN in the socket network namespace. Currently it is impossible to get a path and a mount point for a socket file. socket_diag reports address, device ID and inode number for unix sockets. An address can contain a relative path or a file may be moved somewhere. And these properties say nothing about a mount namespace and a mount point of a socket file. With the introduced ioctl, we can get a path by reading /proc/self/fd/X and get mnt_id from /proc/self/fdinfo/X. In CRIU we are going to use this ioctl to dump and restore unix socket. Here is an example how it can be used: $ strace -e socket,bind,ioctl ./test /tmp/test_sock socket(AF_UNIX, SOCK_STREAM, 0) = 3 bind(3, {sa_family=AF_UNIX, sun_path="test_sock"}, 11) = 0 ioctl(3, SIOCUNIXFILE, 0) = 4 ^Z $ ss -a | grep test_sock u_str LISTEN 0 1 test_sock 17798 * 0 $ ls -l /proc/760/fd/{3,4} lrwx------ 1 root root 64 Feb 1 09:41 3 -> 'socket:[17798]' l--------- 1 root root 64 Feb 1 09:41 4 -> /tmp/test_sock $ cat /proc/760/fdinfo/4 pos: 0 flags: 012000000 mnt_id: 40 $ cat /proc/self/mountinfo | grep "^40\s" 40 19 0:37 / /tmp rw shared:23 - tmpfs tmpfs rw Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- net/unix/af_unix.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index cef79873b09d..e2d18b9f910f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -117,6 +117,7 @@ #include #include #include +#include struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); @@ -2592,6 +2593,43 @@ long unix_outq_len(struct sock *sk) } EXPORT_SYMBOL_GPL(unix_outq_len); +static int unix_open_file(struct sock *sk) +{ + struct path path; + struct file *f; + int fd; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + unix_state_lock(sk); + path = unix_sk(sk)->path; + if (!path.dentry) { + unix_state_unlock(sk); + return -ENOENT; + } + + path_get(&path); + unix_state_unlock(sk); + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + goto out; + + f = dentry_open(&path, O_PATH, current_cred()); + if (IS_ERR(f)) { + put_unused_fd(fd); + fd = PTR_ERR(f); + goto out; + } + + fd_install(fd, f); +out: + path_put(&path); + + return fd; +} + static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; @@ -2610,6 +2648,9 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) else err = put_user(amount, (int __user *)arg); break; + case SIOCUNIXFILE: + err = unix_open_file(sk); + break; default: err = -ENOIOCTLCMD; break; -- cgit v1.2.1 From 3f07c0144132e4f59d88055ac8ff3e691a5fa2b8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:30 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e2d18b9f910f..ee37b390260a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -85,7 +85,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.1 From cdfbabfb2f0ce983fdaa42f20e5f7842178fc01e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 Mar 2017 08:09:05 +0000 Subject: net: Work around lockdep limitation in sockets that use sockets Lockdep issues a circular dependency warning when AFS issues an operation through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem. The theory lockdep comes up with is as follows: (1) If the pagefault handler decides it needs to read pages from AFS, it calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but creating a call requires the socket lock: mmap_sem must be taken before sk_lock-AF_RXRPC (2) afs_open_socket() opens an AF_RXRPC socket and binds it. rxrpc_bind() binds the underlying UDP socket whilst holding its socket lock. inet_bind() takes its own socket lock: sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET (3) Reading from a TCP socket into a userspace buffer might cause a fault and thus cause the kernel to take the mmap_sem, but the TCP socket is locked whilst doing this: sk_lock-AF_INET must be taken before mmap_sem However, lockdep's theory is wrong in this instance because it deals only with lock classes and not individual locks. The AF_INET lock in (2) isn't really equivalent to the AF_INET lock in (3) as the former deals with a socket entirely internal to the kernel that never sees userspace. This is a limitation in the design of lockdep. Fix the general case by: (1) Double up all the locking keys used in sockets so that one set are used if the socket is created by userspace and the other set is used if the socket is created by the kernel. (2) Store the kern parameter passed to sk_alloc() in a variable in the sock struct (sk_kern_sock). This informs sock_lock_init(), sock_init_data() and sk_clone_lock() as to the lock keys to be used. Note that the child created by sk_clone_lock() inherits the parent's kern setting. (3) Add a 'kern' parameter to ->accept() that is analogous to the one passed in to ->create() that distinguishes whether kernel_accept() or sys_accept4() was the caller and can be passed to sk_alloc(). Note that a lot of accept functions merely dequeue an already allocated socket. I haven't touched these as the new socket already exists before we get the parameter. Note also that there are a couple of places where I've made the accepted socket unconditionally kernel-based: irda_accept() rds_rcp_accept_one() tcp_accept_from_sock() because they follow a sock_create_kern() and accept off of that. Whilst creating this, I noticed that lustre and ocfs don't create sockets through sock_create_kern() and thus they aren't marked as for-kernel, though they appear to be internal. I wonder if these should do that so that they use the new set of lock keys. Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/unix/af_unix.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ee37b390260a..928691c43408 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -636,7 +636,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int); static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); -static int unix_accept(struct socket *, struct socket *, int); +static int unix_accept(struct socket *, struct socket *, int, bool); static int unix_getname(struct socket *, struct sockaddr *, int *, int); static unsigned int unix_poll(struct file *, struct socket *, poll_table *); static unsigned int unix_dgram_poll(struct file *, struct socket *, @@ -1402,7 +1402,8 @@ static void unix_sock_inherit_flags(const struct socket *old, set_bit(SOCK_PASSSEC, &new->flags); } -static int unix_accept(struct socket *sock, struct socket *newsock, int flags) +static int unix_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct sock *tsk; -- cgit v1.2.1