diff options
Diffstat (limited to 'net')
78 files changed, 612 insertions, 394 deletions
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index d6f7f7cb79c4..0cfba919d167 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -240,7 +240,7 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) if (!ts) { if (err) *err = -EREMOTEIO; - return POLLERR; + return EPOLLERR; } if (!ts->rd->f_op->poll) @@ -253,7 +253,7 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) n = DEFAULT_POLLMASK; else n = ts->wr->f_op->poll(ts->wr, pt); - ret = (ret & ~POLLOUT) | (n & ~POLLIN); + ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN); } return ret; @@ -396,11 +396,11 @@ end_clear: if (!list_empty(&m->req_list)) { if (test_and_clear_bit(Rpending, &m->wsched)) - n = POLLIN; + n = EPOLLIN; else n = p9_fd_poll(m->client, NULL, NULL); - if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { + if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } @@ -505,11 +505,11 @@ end_clear: if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) - n = POLLOUT; + n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); - if ((n & POLLOUT) && + if ((n & EPOLLOUT) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); @@ -599,12 +599,12 @@ static void p9_conn_create(struct p9_client *client) init_poll_funcptr(&m->pt, p9_pollwait); n = p9_fd_poll(client, &m->pt, NULL); - if (n & POLLIN) { + if (n & EPOLLIN) { p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); set_bit(Rpending, &m->wsched); } - if (n & POLLOUT) { + if (n & EPOLLOUT) { p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } @@ -625,12 +625,12 @@ static void p9_poll_mux(struct p9_conn *m) return; n = p9_fd_poll(m->client, NULL, &err); - if (n & (POLLERR | POLLHUP | POLLNVAL)) { + if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) { p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); p9_conn_cancel(m, err); } - if (n & POLLIN) { + if (n & EPOLLIN) { set_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { @@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m) } } - if (n & POLLOUT) { + if (n & EPOLLOUT) { set_bit(Wpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); if ((m->wsize || !list_empty(&m->unsent_req_list)) && @@ -678,11 +678,11 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) spin_unlock(&client->lock); if (test_and_clear_bit(Wpending, &m->wsched)) - n = POLLOUT; + n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); - if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) + if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) schedule_work(&m->wq); return 0; diff --git a/net/atm/common.c b/net/atm/common.c index 6523f38c4957..fc78a0508ae1 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -661,15 +661,15 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) /* exceptional events */ if (sk->sk_err) - mask = POLLERR; + mask = EPOLLERR; if (test_bit(ATM_VF_RELEASED, &vcc->flags) || test_bit(ATM_VF_CLOSE, &vcc->flags)) - mask |= POLLHUP; + mask |= EPOLLHUP; /* readable? */ if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; /* writable? */ if (sock->state == SS_CONNECTING && @@ -678,7 +678,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) if (vcc->qos.txtp.traffic_class != ATM_NONE && vcc_writable(sk)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; } diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 581375d0eed2..e91f29c7c638 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -304,7 +304,7 @@ static __poll_t batadv_socket_poll(struct file *file, poll_table *wait) poll_wait(file, &socket_client->queue_wait, wait); if (socket_client->queue_len > 0) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; return 0; } diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 9be74a44e99d..dc9fa37ddd14 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -193,7 +193,7 @@ static __poll_t batadv_log_poll(struct file *file, poll_table *wait) poll_wait(file, &debug_log->queue_wait, wait); if (!batadv_log_empty(debug_log)) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; return 0; } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f897681780db..84d92a077834 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -431,7 +431,7 @@ static inline __poll_t bt_accept_poll(struct sock *parent) if (sk->sk_state == BT_CONNECTED || (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && sk->sk_state == BT_CONNECT2)) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; } return 0; @@ -451,20 +451,20 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock, return bt_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR | - (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP | POLLIN | POLLRDNORM; + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + mask |= EPOLLHUP; if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_state == BT_CLOSED) - mask |= POLLHUP; + mask |= EPOLLHUP; if (sk->sk_state == BT_CONNECT || sk->sk_state == BT_CONNECT2 || @@ -472,7 +472,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock, return mask; if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index b109445a1df9..a6fb1b3bcad9 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -924,7 +924,7 @@ static int caif_release(struct socket *sock) caif_disconnect_client(sock_net(sk), &cf_sk->layer); cf_sk->sk.sk_socket->state = SS_DISCONNECTING; - wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP); + wake_up_interruptible_poll(sk_sleep(sk), EPOLLERR|EPOLLHUP); sock_orphan(sk); sk_stream_kill_queues(&cf_sk->sk); @@ -946,23 +946,23 @@ static __poll_t caif_poll(struct file *file, /* exceptional events? */ if (sk->sk_err) - mask |= POLLERR; + mask |= EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP; + mask |= EPOLLRDHUP; /* readable? */ if (!skb_queue_empty(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; /* * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. */ if (sock_writeable(sk) && tx_flow_is_on(cf_sk)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; } diff --git a/net/core/datagram.c b/net/core/datagram.c index b7d9293940b5..9938952c5c78 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -75,7 +75,7 @@ static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, i /* * Avoid a wakeup if event not interesting for us */ - if (key && !(key_to_poll(key) & (POLLIN | POLLERR))) + if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR))) return 0; return autoremove_wake_function(wait, mode, sync, key); } @@ -842,22 +842,22 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR | - (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP | POLLIN | POLLRDNORM; + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + mask |= EPOLLHUP; /* readable? */ if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if (connection_based(sk)) { if (sk->sk_state == TCP_CLOSE) - mask |= POLLHUP; + mask |= EPOLLHUP; /* connection hasn't started yet? */ if (sk->sk_state == TCP_SYN_SENT) return mask; @@ -865,7 +865,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, /* writable? */ if (sock_writeable(sk)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 56af8e41abfc..bc290413a49d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1951,6 +1951,38 @@ static struct net *rtnl_link_get_net_capable(const struct sk_buff *skb, return net; } +/* Verify that rtnetlink requests do not pass additional properties + * potentially referring to different network namespaces. + */ +static int rtnl_ensure_unique_netns(struct nlattr *tb[], + struct netlink_ext_ack *extack, + bool netns_id_only) +{ + + if (netns_id_only) { + if (!tb[IFLA_NET_NS_PID] && !tb[IFLA_NET_NS_FD]) + return 0; + + NL_SET_ERR_MSG(extack, "specified netns attribute not supported"); + return -EOPNOTSUPP; + } + + if (tb[IFLA_IF_NETNSID] && (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD])) + goto invalid_attr; + + if (tb[IFLA_NET_NS_PID] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_FD])) + goto invalid_attr; + + if (tb[IFLA_NET_NS_FD] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_PID])) + goto invalid_attr; + + return 0; + +invalid_attr: + NL_SET_ERR_MSG(extack, "multiple netns identifying attributes specified"); + return -EINVAL; +} + static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) { if (dev) { @@ -2553,6 +2585,10 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + err = rtnl_ensure_unique_netns(tb, extack, false); + if (err < 0) + goto errout; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -2649,6 +2685,10 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + err = rtnl_ensure_unique_netns(tb, extack, true); + if (err < 0) + return err; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); @@ -2802,6 +2842,10 @@ replay: if (err < 0) return err; + err = rtnl_ensure_unique_netns(tb, extack, false); + if (err < 0) + return err; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -3045,6 +3089,10 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + err = rtnl_ensure_unique_netns(tb, extack, true); + if (err < 0) + return err; + if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8c61c27c1b28..09bd89c90a71 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3894,10 +3894,12 @@ EXPORT_SYMBOL_GPL(skb_gro_receive); void __init skb_init(void) { - skbuff_head_cache = kmem_cache_create("skbuff_head_cache", + skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache", sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + offsetof(struct sk_buff, cb), + sizeof_field(struct sk_buff, cb), NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", sizeof(struct sk_buff_fclones), diff --git a/net/core/sock.c b/net/core/sock.c index b026e1717df4..c501499a04fe 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2619,7 +2619,7 @@ static void sock_def_error_report(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_poll(&wq->wait, POLLERR); + wake_up_interruptible_poll(&wq->wait, EPOLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); rcu_read_unlock(); } @@ -2631,8 +2631,8 @@ static void sock_def_readable(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | - POLLRDNORM | POLLRDBAND); + wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | + EPOLLRDNORM | EPOLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); } @@ -2649,8 +2649,8 @@ static void sock_def_write_space(struct sock *sk) if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | - POLLWRNORM | POLLWRBAND); + wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | + EPOLLWRNORM | EPOLLWRBAND); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/core/stream.c b/net/core/stream.c index 1cff9c6270c6..7d329fb1f553 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -38,8 +38,8 @@ void sk_stream_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_poll(&wq->wait, POLLOUT | - POLLWRNORM | POLLWRBAND); + wake_up_interruptible_poll(&wq->wait, EPOLLOUT | + EPOLLWRNORM | EPOLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 74685fecfdb9..15bdc002d90c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -338,21 +338,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, mask = 0; if (sk->sk_err) - mask = POLLERR; + mask = EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) - mask |= POLLHUP; + mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLIN | POLLRDNORM | POLLRDHUP; + mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected? */ if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { if (atomic_read(&sk->sk_rmem_alloc) > 0) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { if (sk_stream_is_writeable(sk)) { - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); @@ -362,7 +362,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, * IO signal will be lost. */ if (sk_stream_is_writeable(sk)) - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } } } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index cc1b505453a8..91dd09f79808 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1216,7 +1216,7 @@ static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wai __poll_t mask = datagram_poll(file, sock, wait); if (!skb_queue_empty(&scp->other_receive_queue)) - mask |= POLLRDBAND; + mask |= EPOLLRDBAND; return mask; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c24008daa3d8..e4329e161943 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -828,7 +828,7 @@ int inet_shutdown(struct socket *sock, int how) case TCP_CLOSE: err = -ENOTCONN; /* Hack to wake up other listeners, who can poll for - POLLHUP, even on eg. unconnected UDP sockets -- RR */ + EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ /* fall through */ default: sk->sk_shutdown |= how; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 5f52236780b4..dfe6fa4ea554 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -80,8 +80,7 @@ endif # NF_TABLES config NF_FLOW_TABLE_IPV4 tristate "Netfilter flow table IPv4 module" - depends on NF_CONNTRACK && NF_TABLES - select NF_FLOW_TABLE + depends on NF_FLOW_TABLE help This option adds the flow table IPv4 support. diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index b2d01eb25f2c..25d2975da156 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -260,6 +260,7 @@ static struct nf_flowtable_type flowtable_ipv4 = { .family = NFPROTO_IPV4, .params = &nf_flow_offload_rhash_params, .gc = nf_flow_offload_work_gc, + .free = nf_flow_table_free, .hook = nf_flow_offload_ip_hook, .owner = THIS_MODULE, }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c059aa7df0a9..48636aee23c3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -512,36 +512,36 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) mask = 0; /* - * POLLHUP is certainly not done right. But poll() doesn't + * EPOLLHUP is certainly not done right. But poll() doesn't * have a notion of HUP in just one direction, and for a * socket the read side is more interesting. * - * Some poll() documentation says that POLLHUP is incompatible - * with the POLLOUT/POLLWR flags, so somebody should check this + * Some poll() documentation says that EPOLLHUP is incompatible + * with the EPOLLOUT/POLLWR flags, so somebody should check this * all. But careful, it tends to be safer to return too many * bits than too few, and you can easily break real applications * if you don't tell them that something has hung up! * * Check-me. * - * Check number 1. POLLHUP is _UNMASKABLE_ event (see UNIX98 and + * Check number 1. EPOLLHUP is _UNMASKABLE_ event (see UNIX98 and * our fs/select.c). It means that after we received EOF, * poll always returns immediately, making impossible poll() on write() - * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP + * in state CLOSE_WAIT. One solution is evident --- to set EPOLLHUP * if and only if shutdown has been made in both directions. * Actually, it is interesting to look how Solaris and DUX - * solve this dilemma. I would prefer, if POLLHUP were maskable, + * solve this dilemma. I would prefer, if EPOLLHUP were maskable, * then we could set it on SND_SHUTDOWN. BTW examples given * in Stevens' books assume exactly this behaviour, it explains - * why POLLHUP is incompatible with POLLOUT. --ANK + * why EPOLLHUP is incompatible with EPOLLOUT. --ANK * * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK */ if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) - mask |= POLLHUP; + mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLIN | POLLRDNORM | POLLRDHUP; + mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected or passive Fast Open socket? */ if (state != TCP_SYN_SENT && @@ -554,11 +554,11 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) target++; if (tp->rcv_nxt - tp->copied_seq >= target) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { if (sk_stream_is_writeable(sk)) { - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); @@ -570,24 +570,24 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ smp_mb__after_atomic(); if (sk_stream_is_writeable(sk)) - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } } else - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; if (tp->urg_data & TCP_URG_VALID) - mask |= POLLPRI; + mask |= EPOLLPRI; } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* Active TCP fastopen socket with defer_connect - * Return POLLOUT so application can call write() + * Return EPOLLOUT so application can call write() * in order for kernel to generate SYN+data */ - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= EPOLLERR; return mask; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cfa51cfd2d99..575d3c1fb6e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -315,7 +315,7 @@ static void tcp_sndbuf_expand(struct sock *sk) /* Fast Recovery (RFC 5681 3.2) : * Cubic needs 1.7 factor, rounded to 2 to include - * extra cushion (application might react slowly to POLLOUT) + * extra cushion (application might react slowly to EPOLLOUT) */ sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; sndmem *= nr_segs * per_mss; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 95738aa0d8a6..f8ad397e285e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -705,7 +705,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) */ if (sk) { arg.bound_dev_if = sk->sk_bound_dev_if; - trace_tcp_send_reset(sk, skb); + if (sk_fullsock(sk)) + trace_tcp_send_reset(sk, skb); } BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 6bb9e14c710a..622caa4039e0 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -29,6 +29,18 @@ static struct tcp_ulp_ops *tcp_ulp_find(const char *name) return NULL; } +static struct tcp_ulp_ops *tcp_ulp_find_id(const int ulp) +{ + struct tcp_ulp_ops *e; + + list_for_each_entry_rcu(e, &tcp_ulp_list, list) { + if (e->uid == ulp) + return e; + } + + return NULL; +} + static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) { const struct tcp_ulp_ops *ulp = NULL; @@ -51,6 +63,18 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) return ulp; } +static const struct tcp_ulp_ops *__tcp_ulp_lookup(const int uid) +{ + const struct tcp_ulp_ops *ulp; + + rcu_read_lock(); + ulp = tcp_ulp_find_id(uid); + if (!ulp || !try_module_get(ulp->owner)) + ulp = NULL; + rcu_read_unlock(); + return ulp; +} + /* Attach new upper layer protocol to the list * of available protocols. */ @@ -59,13 +83,10 @@ int tcp_register_ulp(struct tcp_ulp_ops *ulp) int ret = 0; spin_lock(&tcp_ulp_list_lock); - if (tcp_ulp_find(ulp->name)) { - pr_notice("%s already registered or non-unique name\n", - ulp->name); + if (tcp_ulp_find(ulp->name)) ret = -EEXIST; - } else { + else list_add_tail_rcu(&ulp->list, &tcp_ulp_list); - } spin_unlock(&tcp_ulp_list_lock); return ret; @@ -124,6 +145,34 @@ int tcp_set_ulp(struct sock *sk, const char *name) if (!ulp_ops) return -ENOENT; + if (!ulp_ops->user_visible) { + module_put(ulp_ops->owner); + return -ENOENT; + } + + err = ulp_ops->init(sk); + if (err) { + module_put(ulp_ops->owner); + return err; + } + + icsk->icsk_ulp_ops = ulp_ops; + return 0; +} + +int tcp_set_ulp_id(struct sock *sk, int ulp) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_ulp_ops *ulp_ops; + int err; + + if (icsk->icsk_ulp_ops) + return -EEXIST; + + ulp_ops = __tcp_ulp_lookup(ulp); + if (!ulp_ops) + return -ENOENT; + err = ulp_ops->init(sk); if (err) { module_put(ulp_ops->owner); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f81f969f9c06..bfaefe560b5c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2501,12 +2501,12 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; /* Check for false positives due to checksum errors */ - if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && + if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) - mask &= ~(POLLIN | POLLRDNORM); + mask &= ~(EPOLLIN | EPOLLRDNORM); return mask; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 4a634b7a2c80..d395d1590699 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -73,8 +73,7 @@ endif # NF_TABLES config NF_FLOW_TABLE_IPV6 tristate "Netfilter flow table IPv6 module" - depends on NF_CONNTRACK && NF_TABLES - select NF_FLOW_TABLE + depends on NF_FLOW_TABLE help This option adds the flow table IPv6 support. diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index ce53dcfda88a..b84ce3e6d728 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -264,6 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, * this case. -DaveM */ pr_debug("end of fragment not rounded to 8 bytes.\n"); + inet_frag_kill(&fq->q, &nf_frags); return -EPROTO; } if (end > fq->q.len) { diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c index fff21602875a..d346705d6ee6 100644 --- a/net/ipv6/netfilter/nf_flow_table_ipv6.c +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -253,6 +253,7 @@ static struct nf_flowtable_type flowtable_ipv6 = { .family = NFPROTO_IPV6, .params = &nf_flow_offload_rhash_params, .gc = nf_flow_offload_work_gc, + .free = nf_flow_table_free, .hook = nf_flow_offload_ipv6_hook, .owner = THIS_MODULE, }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fb2d251c0500..9dcfadddd800 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2479,7 +2479,7 @@ static int ip6_route_check_nh_onlink(struct net *net, struct net_device *dev, struct netlink_ext_ack *extack) { - u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL; + u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; const struct in6_addr *gw_addr = &cfg->fc_gateway; u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; struct rt6_info *grt; @@ -2488,8 +2488,10 @@ static int ip6_route_check_nh_onlink(struct net *net, err = 0; grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); if (grt) { - if (grt->rt6i_flags & flags || dev != grt->dst.dev) { - NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); + if (!grt->dst.error && + (grt->rt6i_flags & flags || dev != grt->dst.dev)) { + NL_SET_ERR_MSG(extack, + "Nexthop has invalid gateway or device mismatch"); err = -EINVAL; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a1ab29e2ab3b..412139f4eccd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -942,7 +942,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) { oif = sk->sk_bound_dev_if; - trace_tcp_send_reset(sk, skb); + if (sk_fullsock(sk)) + trace_tcp_send_reset(sk, skb); } tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 64331158d693..1e8cc7bcbca3 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1483,7 +1483,7 @@ static inline __poll_t iucv_accept_poll(struct sock *parent) sk = (struct sock *) isk; if (sk->sk_state == IUCV_CONNECTED) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; } return 0; @@ -1501,27 +1501,27 @@ __poll_t iucv_sock_poll(struct file *file, struct socket *sock, return iucv_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR | - (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP; + mask |= EPOLLRDHUP; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + mask |= EPOLLHUP; if (!skb_queue_empty(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_state == IUCV_CLOSED) - mask |= POLLHUP; + mask |= EPOLLHUP; if (sk->sk_state == IUCV_DISCONN) - mask |= POLLIN; + mask |= EPOLLIN; if (sock_writeable(sk) && iucv_below_msglim(sk)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 4a8d407f8902..f297d53a11aa 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -396,8 +396,8 @@ static int kcm_read_sock_done(struct strparser *strp, int err) static void psock_state_change(struct sock *sk) { - /* TCP only does a POLLIN for a half close. Do a POLLHUP here - * since application will normally not poll with POLLIN + /* TCP only does a EPOLLIN for a half close. Do a EPOLLHUP here + * since application will normally not poll with EPOLLIN * on the TCP sockets. */ @@ -1338,7 +1338,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so * we set sk_state, otherwise epoll_wait always returns right away with - * POLLHUP + * EPOLLHUP */ kcm->sk.sk_state = TCP_ESTABLISHED; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 5dce8336d33f..e545a3c9365f 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -8,6 +8,7 @@ #include <linux/ipv6.h> #include <linux/mpls.h> #include <linux/netconf.h> +#include <linux/nospec.h> #include <linux/vmalloc.h> #include <linux/percpu.h> #include <net/ip.h> @@ -935,24 +936,27 @@ errout: return err; } -static bool mpls_label_ok(struct net *net, unsigned int index, +static bool mpls_label_ok(struct net *net, unsigned int *index, struct netlink_ext_ack *extack) { + bool is_ok = true; + /* Reserved labels may not be set */ - if (index < MPLS_LABEL_FIRST_UNRESERVED) { + if (*index < MPLS_LABEL_FIRST_UNRESERVED) { NL_SET_ERR_MSG(extack, "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher"); - return false; + is_ok = false; } /* The full 20 bit range may not be supported. */ - if (index >= net->mpls.platform_labels) { + if (is_ok && *index >= net->mpls.platform_labels) { NL_SET_ERR_MSG(extack, "Label >= configured maximum in platform_labels"); - return false; + is_ok = false; } - return true; + *index = array_index_nospec(*index, net->mpls.platform_labels); + return is_ok; } static int mpls_route_add(struct mpls_route_config *cfg, @@ -975,7 +979,7 @@ static int mpls_route_add(struct mpls_route_config *cfg, index = find_free_label(net); } - if (!mpls_label_ok(net, index, extack)) + if (!mpls_label_ok(net, &index, extack)) goto errout; /* Append makes no sense with mpls */ @@ -1052,7 +1056,7 @@ static int mpls_route_del(struct mpls_route_config *cfg, index = cfg->rc_label; - if (!mpls_label_ok(net, index, extack)) + if (!mpls_label_ok(net, &index, extack)) goto errout; mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); @@ -1810,7 +1814,7 @@ static int rtm_to_route_config(struct sk_buff *skb, goto errout; if (!mpls_label_ok(cfg->rc_nlinfo.nl_net, - cfg->rc_label, extack)) + &cfg->rc_label, extack)) goto errout; break; } @@ -2137,7 +2141,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, goto errout; } - if (!mpls_label_ok(net, in_label, extack)) { + if (!mpls_label_ok(net, &in_label, extack)) { err = -EINVAL; goto errout; } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9019fa98003d..d3220b43c832 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -666,8 +666,8 @@ endif # NF_TABLES config NF_FLOW_TABLE_INET tristate "Netfilter flow table mixed IPv4/IPv6 module" - depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6 - select NF_FLOW_TABLE + depends on NF_FLOW_TABLE_IPV4 + depends on NF_FLOW_TABLE_IPV6 help This option adds the flow table mixed IPv4/IPv6 support. @@ -675,7 +675,9 @@ config NF_FLOW_TABLE_INET config NF_FLOW_TABLE tristate "Netfilter flow table module" - depends on NF_CONNTRACK && NF_TABLES + depends on NETFILTER_INGRESS + depends on NF_CONNTRACK + depends on NF_TABLES help This option adds the flow table core infrastructure. diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c index 2f5099cb85b8..ec410cae9307 100644 --- a/net/netfilter/nf_flow_table.c +++ b/net/netfilter/nf_flow_table.c @@ -4,6 +4,7 @@ #include <linux/netfilter.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> +#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -124,7 +125,9 @@ void flow_offload_free(struct flow_offload *flow) dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); e = container_of(flow, struct flow_offload_entry, flow); - kfree(e); + nf_ct_delete(e->ct, 0, 0); + nf_ct_put(e->ct); + kfree_rcu(e, rcu_head); } EXPORT_SYMBOL_GPL(flow_offload_free); @@ -148,11 +151,9 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) } EXPORT_SYMBOL_GPL(flow_offload_add); -void flow_offload_del(struct nf_flowtable *flow_table, - struct flow_offload *flow) +static void flow_offload_del(struct nf_flowtable *flow_table, + struct flow_offload *flow) { - struct flow_offload_entry *e; - rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, *flow_table->type->params); @@ -160,10 +161,8 @@ void flow_offload_del(struct nf_flowtable *flow_table, &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, *flow_table->type->params); - e = container_of(flow, struct flow_offload_entry, flow); - kfree_rcu(e, rcu_head); + flow_offload_free(flow); } -EXPORT_SYMBOL_GPL(flow_offload_del); struct flow_offload_tuple_rhash * flow_offload_lookup(struct nf_flowtable *flow_table, @@ -174,15 +173,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table, } EXPORT_SYMBOL_GPL(flow_offload_lookup); -static void nf_flow_release_ct(const struct flow_offload *flow) -{ - struct flow_offload_entry *e; - - e = container_of(flow, struct flow_offload_entry, flow); - nf_ct_delete(e->ct, 0, 0); - nf_ct_put(e->ct); -} - int nf_flow_table_iterate(struct nf_flowtable *flow_table, void (*iter)(struct flow_offload *flow, void *data), void *data) @@ -231,19 +221,16 @@ static inline bool nf_flow_is_dying(const struct flow_offload *flow) return flow->flags & FLOW_OFFLOAD_DYING; } -void nf_flow_offload_work_gc(struct work_struct *work) +static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table) { struct flow_offload_tuple_rhash *tuplehash; - struct nf_flowtable *flow_table; struct rhashtable_iter hti; struct flow_offload *flow; int err; - flow_table = container_of(work, struct nf_flowtable, gc_work.work); - err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); if (err) - goto schedule; + return 0; rhashtable_walk_start(&hti); @@ -261,15 +248,22 @@ void nf_flow_offload_work_gc(struct work_struct *work) flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); if (nf_flow_has_expired(flow) || - nf_flow_is_dying(flow)) { + nf_flow_is_dying(flow)) flow_offload_del(flow_table, flow); - nf_flow_release_ct(flow); - } } out: rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); -schedule: + + return 1; +} + +void nf_flow_offload_work_gc(struct work_struct *work) +{ + struct nf_flowtable *flow_table; + + flow_table = container_of(work, struct nf_flowtable, gc_work.work); + nf_flow_offload_gc_step(flow_table); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); @@ -425,5 +419,35 @@ int nf_flow_dnat_port(const struct flow_offload *flow, } EXPORT_SYMBOL_GPL(nf_flow_dnat_port); +static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) +{ + struct net_device *dev = data; + + if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) + return; + + flow_offload_dead(flow); +} + +static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, + void *data) +{ + nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data); + flush_delayed_work(&flowtable->gc_work); +} + +void nf_flow_table_cleanup(struct net *net, struct net_device *dev) +{ + nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev); +} +EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); + +void nf_flow_table_free(struct nf_flowtable *flow_table) +{ + nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); + WARN_ON(!nf_flow_offload_gc_step(flow_table)); +} +EXPORT_SYMBOL_GPL(nf_flow_table_free); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 281209aeba8f..375a1881d93d 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -24,6 +24,7 @@ static struct nf_flowtable_type flowtable_inet = { .family = NFPROTO_INET, .params = &nf_flow_offload_rhash_params, .gc = nf_flow_offload_work_gc, + .free = nf_flow_table_free, .hook = nf_flow_offload_inet_hook, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0791813a1e7d..8b9fe30de0cd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5006,13 +5006,13 @@ void nft_flow_table_iterate(struct net *net, struct nft_flowtable *flowtable; const struct nft_table *table; - rcu_read_lock(); - list_for_each_entry_rcu(table, &net->nft.tables, list) { - list_for_each_entry_rcu(flowtable, &table->flowtables, list) { + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(flowtable, &table->flowtables, list) { iter(&flowtable->data, data); } } - rcu_read_unlock(); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_flow_table_iterate); @@ -5399,17 +5399,12 @@ err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); } -static void nft_flowtable_destroy(void *ptr, void *arg) -{ - kfree(ptr); -} - static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) { cancel_delayed_work_sync(&flowtable->data.gc_work); kfree(flowtable->name); - rhashtable_free_and_destroy(&flowtable->data.rhashtable, - nft_flowtable_destroy, NULL); + flowtable->data.type->free(&flowtable->data); + rhashtable_destroy(&flowtable->data.rhashtable); module_put(flowtable->data.type->owner); } diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 4503b8dcf9c0..b65829b2be22 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -194,22 +194,6 @@ static struct nft_expr_type nft_flow_offload_type __read_mostly = { .owner = THIS_MODULE, }; -static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data) -{ - struct net_device *dev = data; - - if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) - return; - - flow_offload_dead(flow); -} - -static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable, - void *data) -{ - nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data); -} - static int flow_offload_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -218,7 +202,7 @@ static int flow_offload_netdev_event(struct notifier_block *this, if (event != NETDEV_DOWN) return NOTIFY_DONE; - nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev); + nf_flow_table_cleanup(dev_net(dev), dev); return NOTIFY_DONE; } @@ -246,14 +230,8 @@ register_expr: static void __exit nft_flow_offload_module_exit(void) { - struct net *net; - nft_unregister_expr(&nft_flow_offload_type); unregister_netdevice_notifier(&flow_offload_netdev_notifier); - rtnl_lock(); - for_each_net(net) - nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL); - rtnl_unlock(); } module_init(nft_flow_offload_module_init); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8fa4d37141a7..2f685ee1f9c8 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1008,7 +1008,12 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if ((size >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; - info = kvmalloc(sz, GFP_KERNEL); + /* __GFP_NORETRY is not fully supported by kvmalloc but it should + * work reasonably well if sz is too large and bail out rather + * than shoot all processes down before realizing there is nothing + * more to reclaim. + */ + info = kvmalloc(sz, GFP_KERNEL | __GFP_NORETRY); if (!info) return NULL; diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 498b54fd04d7..141c295191f6 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -39,23 +39,31 @@ static void xt_rateest_hash_insert(struct xt_rateest *est) hlist_add_head(&est->list, &rateest_hash[h]); } -struct xt_rateest *xt_rateest_lookup(const char *name) +static struct xt_rateest *__xt_rateest_lookup(const char *name) { struct xt_rateest *est; unsigned int h; h = xt_rateest_hash(name); - mutex_lock(&xt_rateest_mutex); hlist_for_each_entry(est, &rateest_hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; - mutex_unlock(&xt_rateest_mutex); return est; } } - mutex_unlock(&xt_rateest_mutex); + return NULL; } + +struct xt_rateest *xt_rateest_lookup(const char *name) +{ + struct xt_rateest *est; + + mutex_lock(&xt_rateest_mutex); + est = __xt_rateest_lookup(name); + mutex_unlock(&xt_rateest_mutex); + return est; +} EXPORT_SYMBOL_GPL(xt_rateest_lookup); void xt_rateest_put(struct xt_rateest *est) @@ -100,8 +108,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); - est = xt_rateest_lookup(info->name); + mutex_lock(&xt_rateest_mutex); + est = __xt_rateest_lookup(info->name); if (est) { + mutex_unlock(&xt_rateest_mutex); /* * If estimator parameters are specified, they must match the * existing estimator. @@ -139,11 +149,13 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) info->est = est; xt_rateest_hash_insert(est); + mutex_unlock(&xt_rateest_mutex); return 0; err2: kfree(est); err1: + mutex_unlock(&xt_rateest_mutex); return ret; } diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 1db1ce59079f..891f4e7e8ea7 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -52,6 +52,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) return -EINVAL; } + info->priv = NULL; if (info->has_path) { cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d444daf1ac04..6f02499ef007 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1081,6 +1081,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, { struct sk_buff *tmp; struct net *net, *prev = NULL; + bool delivered = false; int err; for_each_net_rcu(net) { @@ -1092,14 +1093,21 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, } err = nlmsg_multicast(prev->genl_sock, tmp, portid, group, flags); - if (err) + if (!err) + delivered = true; + else if (err != -ESRCH) goto error; } prev = net; } - return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); + err = nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); + if (!err) + delivered = true; + else if (err != -ESRCH) + goto error; + return delivered ? 0 : -ESRCH; error: kfree_skb(skb); return err; diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 985909f105eb..376040092142 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -543,7 +543,7 @@ static inline __poll_t llcp_accept_poll(struct sock *parent) sk = &llcp_sock->sk; if (sk->sk_state == LLCP_CONNECTED) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; } return 0; @@ -563,23 +563,23 @@ static __poll_t llcp_sock_poll(struct file *file, struct socket *sock, return llcp_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR | - (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_state == LLCP_CLOSED) - mask |= POLLHUP; + mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP | POLLIN | POLLRDNORM; + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + mask |= EPOLLHUP; if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1d1483007e46..e0f3f4aeeb4f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4085,7 +4085,7 @@ static __poll_t packet_poll(struct file *file, struct socket *sock, if (po->rx_ring.pg_vec) { if (!packet_previous_rx_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; } if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) po->pressure = 0; @@ -4093,7 +4093,7 @@ static __poll_t packet_poll(struct file *file, struct socket *sock, spin_lock_bh(&sk->sk_write_queue.lock); if (po->tx_ring.pg_vec) { if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE)) - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } spin_unlock_bh(&sk->sk_write_queue.lock); return mask; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 08f6751d2030..fffcd69f63ff 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -351,18 +351,18 @@ static __poll_t pn_socket_poll(struct file *file, struct socket *sock, poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_CLOSE) - return POLLERR; + return EPOLLERR; if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; if (!skb_queue_empty(&pn->ctrlreq_queue)) - mask |= POLLPRI; + mask |= EPOLLPRI; if (!mask && sk->sk_state == TCP_CLOSE_WAIT) - return POLLHUP; + return EPOLLHUP; if (sk->sk_state == TCP_ESTABLISHED && refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf && atomic_read(&pn->tx_credits)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; } diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 88aa8ad0f5b6..744c637c86b0 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -137,17 +137,17 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, /* * RDS' poll is without a doubt the least intuitive part of the interface, - * as POLLIN and POLLOUT do not behave entirely as you would expect from + * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from * a network protocol. * - * POLLIN is asserted if + * EPOLLIN is asserted if * - there is data on the receive queue. * - to signal that a previously congested destination may have become * uncongested * - A notification has been queued to the socket (this can be a congestion * update, or a RDMA completion). * - * POLLOUT is asserted if there is room on the send queue. This does not mean + * EPOLLOUT is asserted if there is room on the send queue. This does not mean * however, that the next sendmsg() call will succeed. If the application tries * to send to a congested destination, the system call may still fail (and * return ENOBUFS). @@ -167,22 +167,22 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, read_lock_irqsave(&rs->rs_recv_lock, flags); if (!rs->rs_cong_monitor) { - /* When a congestion map was updated, we signal POLLIN for + /* When a congestion map was updated, we signal EPOLLIN for * "historical" reasons. Applications can also poll for * WRBAND instead. */ if (rds_cong_updated_since(&rs->rs_cong_track)) - mask |= (POLLIN | POLLRDNORM | POLLWRBAND); + mask |= (EPOLLIN | EPOLLRDNORM | EPOLLWRBAND); } else { spin_lock(&rs->rs_lock); if (rs->rs_cong_notify) - mask |= (POLLIN | POLLRDNORM); + mask |= (EPOLLIN | EPOLLRDNORM); spin_unlock(&rs->rs_lock); } if (!list_empty(&rs->rs_recv_queue) || !list_empty(&rs->rs_notify_queue)) - mask |= (POLLIN | POLLRDNORM); + mask |= (EPOLLIN | EPOLLRDNORM); if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) - mask |= (POLLOUT | POLLWRNORM); + mask |= (EPOLLOUT | EPOLLWRNORM); read_unlock_irqrestore(&rs->rs_recv_lock, flags); /* clear state any time we wake a seen-congested socket */ diff --git a/net/rds/cong.c b/net/rds/cong.c index 8d19fd25dce3..63da9d2f142d 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -223,7 +223,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map) rcu_read_lock(); if (!test_and_set_bit(0, &conn->c_map_queued) && - !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + !rds_destroy_pending(cp->cp_conn)) { rds_stats_inc(s_cong_update_queued); /* We cannot inline the call to rds_send_xmit() here * for two reasons (both pertaining to a TCP transport): diff --git a/net/rds/connection.c b/net/rds/connection.c index b10c0ef36d8d..94e190febfdd 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -220,8 +220,13 @@ static struct rds_connection *__rds_conn_create(struct net *net, is_outgoing); conn->c_path[i].cp_index = i; } - ret = trans->conn_alloc(conn, gfp); + rcu_read_lock(); + if (rds_destroy_pending(conn)) + ret = -ENETDOWN; + else + ret = trans->conn_alloc(conn, gfp); if (ret) { + rcu_read_unlock(); kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(ret); @@ -283,6 +288,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, } } spin_unlock_irqrestore(&rds_conn_lock, flags); + rcu_read_unlock(); out: return conn; @@ -382,13 +388,10 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) { struct rds_message *rm, *rtmp; - set_bit(RDS_DESTROY_PENDING, &cp->cp_flags); - if (!cp->cp_transport_data) return; /* make sure lingering queued work won't try to ref the conn */ - synchronize_rcu(); cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); @@ -691,7 +694,7 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) atomic_set(&cp->cp_state, RDS_CONN_ERROR); rcu_read_lock(); - if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + if (!destroy && rds_destroy_pending(cp->cp_conn)) { rcu_read_unlock(); return; } @@ -714,7 +717,7 @@ EXPORT_SYMBOL_GPL(rds_conn_drop); void rds_conn_path_connect_if_down(struct rds_conn_path *cp) { rcu_read_lock(); - if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + if (rds_destroy_pending(cp->cp_conn)) { rcu_read_unlock(); return; } diff --git a/net/rds/ib.c b/net/rds/ib.c index ff0c98096af1..50a88f3e7e39 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -48,6 +48,7 @@ static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE; static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE; unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; +static atomic_t rds_ib_unloading; module_param(rds_ib_mr_1m_pool_size, int, 0444); MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA"); @@ -378,8 +379,23 @@ static void rds_ib_unregister_client(void) flush_workqueue(rds_wq); } +static void rds_ib_set_unloading(void) +{ + atomic_set(&rds_ib_unloading, 1); +} + +static bool rds_ib_is_unloading(struct rds_connection *conn) +{ + struct rds_conn_path *cp = &conn->c_path[0]; + + return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) || + atomic_read(&rds_ib_unloading) != 0); +} + void rds_ib_exit(void) { + rds_ib_set_unloading(); + synchronize_rcu(); rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); rds_ib_unregister_client(); rds_ib_destroy_nodev_conns(); @@ -413,6 +429,7 @@ struct rds_transport rds_ib_transport = { .flush_mrs = rds_ib_flush_mrs, .t_owner = THIS_MODULE, .t_name = "infiniband", + .t_unloading = rds_ib_is_unloading, .t_type = RDS_TRANS_IB }; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 80fb6f63e768..eea1d8611b20 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -117,6 +117,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even &conn->c_laddr, &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version)); + set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags); rds_conn_destroy(conn); return; } else { diff --git a/net/rds/rds.h b/net/rds/rds.h index 374ae83b60d4..7301b9b01890 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -518,6 +518,7 @@ struct rds_transport { void (*sync_mr)(void *trans_private, int direction); void (*free_mr)(void *trans_private, int invalidate); void (*flush_mrs)(void); + bool (*t_unloading)(struct rds_connection *conn); }; struct rds_sock { @@ -862,6 +863,12 @@ static inline void rds_mr_put(struct rds_mr *mr) __rds_put_mr_final(mr); } +static inline bool rds_destroy_pending(struct rds_connection *conn) +{ + return !check_net(rds_conn_net(conn)) || + (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn)); +} + /* stats.c */ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); #define rds_stats_inc_which(which, member) do { \ diff --git a/net/rds/send.c b/net/rds/send.c index d3e32d1f3c7d..b1b0022b8370 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -162,7 +162,7 @@ restart: goto out; } - if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + if (rds_destroy_pending(cp->cp_conn)) { release_in_xmit(cp); ret = -ENETUNREACH; /* dont requeue send work */ goto out; @@ -444,7 +444,7 @@ over_batch: if (batch_count < send_batch_count) goto restart; rcu_read_lock(); - if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + if (rds_destroy_pending(cp->cp_conn)) ret = -ENETUNREACH; else queue_delayed_work(rds_wq, &cp->cp_send_w, 1); @@ -1162,7 +1162,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) else cpath = &conn->c_path[0]; - if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) { + if (rds_destroy_pending(conn)) { ret = -EAGAIN; goto out; } @@ -1209,7 +1209,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) if (ret == -ENOMEM || ret == -EAGAIN) { ret = 0; rcu_read_lock(); - if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) + if (rds_destroy_pending(cpath->cp_conn)) ret = -ENETUNREACH; else queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); @@ -1295,7 +1295,7 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport, /* schedule the send work on rds_wq */ rcu_read_lock(); - if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + if (!rds_destroy_pending(cp->cp_conn)) queue_delayed_work(rds_wq, &cp->cp_send_w, 1); rcu_read_unlock(); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 9920d2f84eff..44c4652721af 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -49,6 +49,7 @@ static unsigned int rds_tcp_tc_count; /* Track rds_tcp_connection structs so they can be cleaned up */ static DEFINE_SPINLOCK(rds_tcp_conn_lock); static LIST_HEAD(rds_tcp_conn_list); +static atomic_t rds_tcp_unloading = ATOMIC_INIT(0); static struct kmem_cache *rds_tcp_conn_slab; @@ -274,14 +275,13 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) static void rds_tcp_conn_free(void *arg) { struct rds_tcp_connection *tc = arg; - unsigned long flags; rdsdebug("freeing tc %p\n", tc); - spin_lock_irqsave(&rds_tcp_conn_lock, flags); + spin_lock_bh(&rds_tcp_conn_lock); if (!tc->t_tcp_node_detached) list_del(&tc->t_tcp_node); - spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); + spin_unlock_bh(&rds_tcp_conn_lock); kmem_cache_free(rds_tcp_conn_slab, tc); } @@ -296,7 +296,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); if (!tc) { ret = -ENOMEM; - break; + goto fail; } mutex_init(&tc->t_conn_path_lock); tc->t_sock = NULL; @@ -306,14 +306,19 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) conn->c_path[i].cp_transport_data = tc; tc->t_cpath = &conn->c_path[i]; + tc->t_tcp_node_detached = true; - spin_lock_irq(&rds_tcp_conn_lock); - tc->t_tcp_node_detached = false; - list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); - spin_unlock_irq(&rds_tcp_conn_lock); rdsdebug("rds_conn_path [%d] tc %p\n", i, conn->c_path[i].cp_transport_data); } + spin_lock_bh(&rds_tcp_conn_lock); + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + tc = conn->c_path[i].cp_transport_data; + tc->t_tcp_node_detached = false; + list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); + } + spin_unlock_bh(&rds_tcp_conn_lock); +fail: if (ret) { for (j = 0; j < i; j++) rds_tcp_conn_free(conn->c_path[j].cp_transport_data); @@ -332,6 +337,16 @@ static bool list_has_conn(struct list_head *list, struct rds_connection *conn) return false; } +static void rds_tcp_set_unloading(void) +{ + atomic_set(&rds_tcp_unloading, 1); +} + +static bool rds_tcp_is_unloading(struct rds_connection *conn) +{ + return atomic_read(&rds_tcp_unloading) != 0; +} + static void rds_tcp_destroy_conns(void) { struct rds_tcp_connection *tc, *_tc; @@ -370,6 +385,7 @@ struct rds_transport rds_tcp_transport = { .t_type = RDS_TRANS_TCP, .t_prefer_loopback = 1, .t_mp_capable = 1, + .t_unloading = rds_tcp_is_unloading, }; static unsigned int rds_tcp_netid; @@ -513,7 +529,7 @@ static void rds_tcp_kill_sock(struct net *net) rtn->rds_tcp_listen_sock = NULL; rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); - spin_lock_irq(&rds_tcp_conn_lock); + spin_lock_bh(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -526,7 +542,7 @@ static void rds_tcp_kill_sock(struct net *net) tc->t_tcp_node_detached = true; } } - spin_unlock_irq(&rds_tcp_conn_lock); + spin_unlock_bh(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) rds_conn_destroy(tc->t_cpath->cp_conn); } @@ -574,7 +590,7 @@ static void rds_tcp_sysctl_reset(struct net *net) { struct rds_tcp_connection *tc, *_tc; - spin_lock_irq(&rds_tcp_conn_lock); + spin_lock_bh(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); @@ -584,7 +600,7 @@ static void rds_tcp_sysctl_reset(struct net *net) /* reconnect with new parameters */ rds_conn_path_drop(tc->t_cpath, false); } - spin_unlock_irq(&rds_tcp_conn_lock); + spin_unlock_bh(&rds_tcp_conn_lock); } static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, @@ -607,6 +623,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, static void rds_tcp_exit(void) { + rds_tcp_set_unloading(); + synchronize_rcu(); rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); unregister_pernet_subsys(&rds_tcp_net_ops); if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 534c67aeb20f..d999e7075645 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) cp->cp_conn, tc, sock); if (sock) { - if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + if (rds_destroy_pending(cp->cp_conn)) rds_tcp_set_linger(sock); sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index dd707b9e73e5..b9fbd2ee74ef 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -323,7 +323,7 @@ void rds_tcp_data_ready(struct sock *sk) if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) { rcu_read_lock(); - if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + if (!rds_destroy_pending(cp->cp_conn)) queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); rcu_read_unlock(); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 16f65744d984..7df869d37afd 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -204,7 +204,7 @@ void rds_tcp_write_space(struct sock *sk) rcu_read_lock(); if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf && - !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + !rds_destroy_pending(cp->cp_conn)) queue_delayed_work(rds_wq, &cp->cp_send_w, 0); rcu_read_unlock(); diff --git a/net/rds/threads.c b/net/rds/threads.c index eb76db1360b0..c52861d77a59 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -88,7 +88,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) cp->cp_reconnect_jiffies = 0; set_bit(0, &cp->cp_conn->c_map_queued); rcu_read_lock(); - if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) { + if (!rds_destroy_pending(cp->cp_conn)) { queue_delayed_work(rds_wq, &cp->cp_send_w, 0); queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); } @@ -138,7 +138,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp) if (cp->cp_reconnect_jiffies == 0) { cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; rcu_read_lock(); - if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + if (!rds_destroy_pending(cp->cp_conn)) queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); rcu_read_unlock(); return; @@ -149,7 +149,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp) rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, conn, &conn->c_laddr, &conn->c_faddr); rcu_read_lock(); - if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) + if (!rds_destroy_pending(cp->cp_conn)) queue_delayed_work(rds_wq, &cp->cp_conn_w, rand % cp->cp_reconnect_jiffies); rcu_read_unlock(); diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 124c77e9d058..59d0eb960275 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1142,13 +1142,13 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait) { struct rfkill_data *data = file->private_data; - __poll_t res = POLLOUT | POLLWRNORM; + __poll_t res = EPOLLOUT | EPOLLWRNORM; poll_wait(file, &data->read_wait, wait); mutex_lock(&data->mtx); if (!list_empty(&data->events)) - res = POLLIN | POLLRDNORM; + res = EPOLLIN | EPOLLRDNORM; mutex_unlock(&data->mtx); return res; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 21ad6a3a465c..0c9c18aa7c77 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -742,13 +742,13 @@ static __poll_t rxrpc_poll(struct file *file, struct socket *sock, /* the socket is readable if there are any messages waiting on the Rx * queue */ if (!list_empty(&rx->recvmsg_q)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; /* the socket is writable if there is space to add new data to the * socket; there is no guarantee that any particular call in progress * on the socket may have space in the Tx ACK window */ if (rxrpc_writable(sk)) - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; return mask; } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 7f74ca3059f8..064175068059 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -834,7 +834,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) * can be skipped if we find a follow-on call. The first DATA packet * of the follow on call will implicitly ACK this call. */ - if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + if (call->completion == RXRPC_CALL_SUCCEEDED && + test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { unsigned long final_ack_at = jiffies + 2; WRITE_ONCE(chan->final_ack_at, final_ack_at); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 4ca11be6be3c..b1dfae107431 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -460,6 +460,7 @@ void rxrpc_process_connection(struct work_struct *work) case -EKEYEXPIRED: case -EKEYREJECTED: goto protocol_error; + case -ENOMEM: case -EAGAIN: goto requeue_and_leave; case -ECONNABORTED: diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index c628351eb900..ccbac190add1 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -177,13 +177,21 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, * through the channel, whilst disposing of the actual call record. */ trace_rxrpc_disconnect_call(call); - if (call->abort_code) { - chan->last_abort = call->abort_code; - chan->last_type = RXRPC_PACKET_TYPE_ABORT; - } else { + switch (call->completion) { + case RXRPC_CALL_SUCCEEDED: chan->last_seq = call->rx_hard_ack; chan->last_type = RXRPC_PACKET_TYPE_ACK; + break; + case RXRPC_CALL_LOCALLY_ABORTED: + chan->last_abort = call->abort_code; + chan->last_type = RXRPC_PACKET_TYPE_ABORT; + break; + default: + chan->last_abort = RX_USER_ABORT; + chan->last_type = RXRPC_PACKET_TYPE_ABORT; + break; } + /* Sync with rxrpc_conn_retransmit(). */ smp_wmb(); chan->last_call = chan->call_id; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index c38b3a1de56c..77cb23c7bd0a 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -773,8 +773,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, { const struct rxrpc_key_token *token; struct rxkad_challenge challenge; - struct rxkad_response resp - __attribute__((aligned(8))); /* must be aligned for crypto */ + struct rxkad_response *resp; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); const char *eproto; u32 version, nonce, min_level, abort_code; @@ -818,26 +817,29 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, token = conn->params.key->payload.data[0]; /* build the response packet */ - memset(&resp, 0, sizeof(resp)); - - resp.version = htonl(RXKAD_VERSION); - resp.encrypted.epoch = htonl(conn->proto.epoch); - resp.encrypted.cid = htonl(conn->proto.cid); - resp.encrypted.securityIndex = htonl(conn->security_ix); - resp.encrypted.inc_nonce = htonl(nonce + 1); - resp.encrypted.level = htonl(conn->params.security_level); - resp.kvno = htonl(token->kad->kvno); - resp.ticket_len = htonl(token->kad->ticket_len); - - resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter); - resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter); - resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter); - resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter); + resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); + if (!resp) + return -ENOMEM; + + resp->version = htonl(RXKAD_VERSION); + resp->encrypted.epoch = htonl(conn->proto.epoch); + resp->encrypted.cid = htonl(conn->proto.cid); + resp->encrypted.securityIndex = htonl(conn->security_ix); + resp->encrypted.inc_nonce = htonl(nonce + 1); + resp->encrypted.level = htonl(conn->params.security_level); + resp->kvno = htonl(token->kad->kvno); + resp->ticket_len = htonl(token->kad->ticket_len); + resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter); + resp->encrypted.call_id[1] = htonl(conn->channels[1].call_counter); + resp->encrypted.call_id[2] = htonl(conn->channels[2].call_counter); + resp->encrypted.call_id[3] = htonl(conn->channels[3].call_counter); /* calculate the response checksum and then do the encryption */ - rxkad_calc_response_checksum(&resp); - rxkad_encrypt_response(conn, &resp, token->kad); - return rxkad_send_response(conn, &sp->hdr, &resp, token->kad); + rxkad_calc_response_checksum(resp); + rxkad_encrypt_response(conn, resp, token->kad); + ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); + kfree(resp); + return ret; protocol_error: trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); @@ -1048,8 +1050,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) { - struct rxkad_response response - __attribute__((aligned(8))); /* must be aligned for crypto */ + struct rxkad_response *response; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; const char *eproto; @@ -1061,17 +1062,22 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + ret = -ENOMEM; + response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); + if (!response) + goto temporary_error; + eproto = tracepoint_string("rxkad_rsp_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &response, sizeof(response)) < 0) + response, sizeof(*response)) < 0) goto protocol_error; - if (!pskb_pull(skb, sizeof(response))) + if (!pskb_pull(skb, sizeof(*response))) BUG(); - version = ntohl(response.version); - ticket_len = ntohl(response.ticket_len); - kvno = ntohl(response.kvno); + version = ntohl(response->version); + ticket_len = ntohl(response->ticket_len); + kvno = ntohl(response->kvno); _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", sp->hdr.serial, version, kvno, ticket_len); @@ -1105,31 +1111,31 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, &expiry, _abort_code); if (ret < 0) - goto temporary_error_free; + goto temporary_error_free_resp; /* use the session key from inside the ticket to decrypt the * response */ - rxkad_decrypt_response(conn, &response, &session_key); + rxkad_decrypt_response(conn, response, &session_key); eproto = tracepoint_string("rxkad_rsp_param"); abort_code = RXKADSEALEDINCON; - if (ntohl(response.encrypted.epoch) != conn->proto.epoch) + if (ntohl(response->encrypted.epoch) != conn->proto.epoch) goto protocol_error_free; - if (ntohl(response.encrypted.cid) != conn->proto.cid) + if (ntohl(response->encrypted.cid) != conn->proto.cid) goto protocol_error_free; - if (ntohl(response.encrypted.securityIndex) != conn->security_ix) + if (ntohl(response->encrypted.securityIndex) != conn->security_ix) goto protocol_error_free; - csum = response.encrypted.checksum; - response.encrypted.checksum = 0; - rxkad_calc_response_checksum(&response); + csum = response->encrypted.checksum; + response->encrypted.checksum = 0; + rxkad_calc_response_checksum(response); eproto = tracepoint_string("rxkad_rsp_csum"); - if (response.encrypted.checksum != csum) + if (response->encrypted.checksum != csum) goto protocol_error_free; spin_lock(&conn->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { struct rxrpc_call *call; - u32 call_id = ntohl(response.encrypted.call_id[i]); + u32 call_id = ntohl(response->encrypted.call_id[i]); eproto = tracepoint_string("rxkad_rsp_callid"); if (call_id > INT_MAX) @@ -1153,12 +1159,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, eproto = tracepoint_string("rxkad_rsp_seq"); abort_code = RXKADOUTOFSEQUENCE; - if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1) + if (ntohl(response->encrypted.inc_nonce) != conn->security_nonce + 1) goto protocol_error_free; eproto = tracepoint_string("rxkad_rsp_level"); abort_code = RXKADLEVELFAIL; - level = ntohl(response.encrypted.level); + level = ntohl(response->encrypted.level); if (level > RXRPC_SECURITY_ENCRYPT) goto protocol_error_free; conn->params.security_level = level; @@ -1168,9 +1174,10 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, * as for a client connection */ ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); if (ret < 0) - goto temporary_error_free; + goto temporary_error_free_ticket; kfree(ticket); + kfree(response); _leave(" = 0"); return 0; @@ -1179,12 +1186,15 @@ protocol_error_unlock: protocol_error_free: kfree(ticket); protocol_error: + kfree(response); trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); *_abort_code = abort_code; return -EPROTO; -temporary_error_free: +temporary_error_free_ticket: kfree(ticket); +temporary_error_free_resp: + kfree(response); temporary_error: /* Ignore the response packet if we got a temporary error such as * ENOMEM. We just want to send the challenge again. Note that we diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c07e8abc3d52..6c7601a530e3 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -947,7 +947,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - if (n->flags != flags) { + if ((n->flags ^ flags) & + ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) { NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags"); return -EINVAL; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 7bbc13b8ca47..7c179addebcd 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -327,7 +327,7 @@ static s64 tabledist(s64 mu, s32 sigma, /* default uniform distribution */ if (dist == NULL) - return (rnd % (2 * sigma)) - sigma + mu; + return ((rnd % (2 * sigma)) + mu) - sigma; t = dist->table[rnd % dist->size]; x = (sigma % NETEM_DIST_SCALE) * t; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 793b05ec692b..d01475f5f710 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1380,9 +1380,14 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, struct sctp_chunk *retval; struct sk_buff *skb; struct sock *sk; + int chunklen; + + chunklen = SCTP_PAD4(sizeof(*chunk_hdr) + paylen); + if (chunklen > SCTP_MAX_CHUNK_LEN) + goto nodata; /* No need to allocate LL here, as this is only a chunk. */ - skb = alloc_skb(SCTP_PAD4(sizeof(*chunk_hdr) + paylen), gfp); + skb = alloc_skb(chunklen, gfp); if (!skb) goto nodata; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ebb8cb9eb0bd..bf271f8c2dc9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7602,22 +7602,22 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) return (!list_empty(&sp->ep->asocs)) ? - (POLLIN | POLLRDNORM) : 0; + (EPOLLIN | EPOLLRDNORM) : 0; mask = 0; /* Is there any exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR | - (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP | POLLIN | POLLRDNORM; + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + mask |= EPOLLHUP; /* Is it readable? Reconsider this code with TCP-style support. */ if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; /* The association is either gone or not ready. */ if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED)) @@ -7625,7 +7625,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Is it writable? */ if (sctp_writeable(sk)) { - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } else { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); /* @@ -7637,7 +7637,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) * in the following code to cover it as well. */ if (sctp_writeable(sk)) - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } return mask; } @@ -8161,8 +8161,8 @@ void sctp_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLIN | - POLLRDNORM | POLLRDBAND); + wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | + EPOLLRDNORM | EPOLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 3583c8ab1bae..da1a5cdefd13 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1141,11 +1141,11 @@ out: static __poll_t smc_accept_poll(struct sock *parent) { struct smc_sock *isk = smc_sk(parent); - int mask = 0; + __poll_t mask = 0; spin_lock(&isk->accept_q_lock); if (!list_empty(&isk->accept_q)) - mask = POLLIN | POLLRDNORM; + mask = EPOLLIN | EPOLLRDNORM; spin_unlock(&isk->accept_q_lock); return mask; @@ -1160,7 +1160,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, int rc; if (!sk) - return POLLNVAL; + return EPOLLNVAL; smc = smc_sk(sock->sk); sock_hold(sk); @@ -1171,16 +1171,16 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); /* if non-blocking connect finished ... */ lock_sock(sk); - if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { + if ((sk->sk_state == SMC_INIT) && (mask & EPOLLOUT)) { sk->sk_err = smc->clcsock->sk->sk_err; if (sk->sk_err) { - mask |= POLLERR; + mask |= EPOLLERR; } else { rc = smc_connect_rdma(smc); if (rc < 0) - mask |= POLLERR; + mask |= EPOLLERR; /* success cases including fallback */ - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } } } else { @@ -1190,27 +1190,27 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, lock_sock(sk); } if (sk->sk_err) - mask |= POLLERR; + mask |= EPOLLERR; if ((sk->sk_shutdown == SHUTDOWN_MASK) || (sk->sk_state == SMC_CLOSED)) - mask |= POLLHUP; + mask |= EPOLLHUP; if (sk->sk_state == SMC_LISTEN) { /* woken up by sk_data_ready in smc_listen_work() */ mask = smc_accept_poll(sk); } else { if (atomic_read(&smc->conn.sndbuf_space) || sk->sk_shutdown & SEND_SHUTDOWN) { - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } else { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); } if (atomic_read(&smc->conn.bytes_to_rcv)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLIN | POLLRDNORM | POLLRDHUP; + mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; if (sk->sk_state == SMC_APPCLOSEWAIT1) - mask |= POLLIN; + mask |= EPOLLIN; } } diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 9dc392ca06bf..eff4e0d0bb31 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -35,8 +35,8 @@ static void smc_rx_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | - POLLRDNORM | POLLRDBAND); + wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | + EPOLLRDNORM | EPOLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); if ((sk->sk_shutdown == SHUTDOWN_MASK) || (sk->sk_state == SMC_CLOSED)) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 838bce20c361..72f004c9c9b1 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -46,8 +46,8 @@ static void smc_tx_write_space(struct sock *sk) wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, - POLLOUT | POLLWRNORM | - POLLWRBAND); + EPOLLOUT | EPOLLWRNORM | + EPOLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index aa36dad32db1..8a7e1c774f9c 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -940,7 +940,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait, poll_wait(filp, &queue_wait, wait); /* alway allow write */ - mask = POLLOUT | POLLWRNORM; + mask = EPOLLOUT | EPOLLWRNORM; if (!rp) return mask; @@ -950,7 +950,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait, for (cq= &rp->q; &cq->list != &cd->queue; cq = list_entry(cq->list.next, struct cache_queue, list)) if (!cq->reader) { - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; break; } spin_unlock(&queue_lock); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 5c4330325787..fc97fc3ed637 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -345,15 +345,15 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) { struct inode *inode = file_inode(filp); struct rpc_inode *rpci = RPC_I(inode); - __poll_t mask = POLLOUT | POLLWRNORM; + __poll_t mask = EPOLLOUT | EPOLLWRNORM; poll_wait(filp, &rpci->waitq, wait); inode_lock(inode); if (rpci->pipe == NULL) - mask |= POLLERR | POLLHUP; + mask |= EPOLLERR | EPOLLHUP; else if (filp->private_data || !list_empty(&rpci->pipe->pipe)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; inode_unlock(inode); return mask; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 896691afbb1a..d9db2eab3a8d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -461,6 +461,18 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r /* * Wake up a task on a specific queue */ +void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, + struct rpc_wait_queue *queue, + struct rpc_task *task) +{ + spin_lock_bh(&queue->lock); + rpc_wake_up_task_on_wq_queue_locked(wq, queue, task); + spin_unlock_bh(&queue->lock); +} + +/* + * Wake up a task on a specific queue + */ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) { spin_lock_bh(&queue->lock); @@ -1092,12 +1104,12 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!wq) goto out_failed; rpciod_workqueue = wq; /* Note: highpri because network receive is latency sensitive */ - wq = alloc_workqueue("xprtiod", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + wq = alloc_workqueue("xprtiod", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_HIGHPRI, 0); if (!wq) goto free_rpciod; xprtiod_workqueue = wq; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2436fd1125fc..8f0ad4f268da 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -517,7 +517,8 @@ void xprt_write_space(struct rpc_xprt *xprt) if (xprt->snd_task) { dprintk("RPC: write space: waking waiting task on " "xprt %p\n", xprt); - rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task); + rpc_wake_up_queued_task_on_wq(xprtiod_workqueue, + &xprt->pending, xprt->snd_task); } spin_unlock_bh(&xprt->transport_lock); } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 162e5dd82466..f0855a959a27 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -143,7 +143,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, if (xdr->page_len) { remaining = xdr->page_len; offset = offset_in_page(xdr->page_base); - count = 0; + count = RPCRDMA_MIN_SEND_SGES; while (remaining) { remaining -= min_t(unsigned int, PAGE_SIZE - offset, remaining); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index f4eb63e8e689..e6f84a6434a0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -505,7 +505,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); return -ENOMEM; } - ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES; + ia->ri_max_send_sges = max_sge; if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { dprintk("RPC: %s: insufficient wqe's available\n", @@ -1502,6 +1502,9 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) { + if (!rb) + return; + if (!rpcrdma_regbuf_is_mapped(rb)) return; @@ -1517,9 +1520,6 @@ rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) void rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) { - if (!rb) - return; - rpcrdma_dma_unmap_regbuf(rb); kfree(rb); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 18803021f242..a6b8c1f8f92a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -807,13 +807,6 @@ static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) smp_mb__after_atomic(); } -static void xs_sock_mark_closed(struct rpc_xprt *xprt) -{ - xs_sock_reset_connection_flags(xprt); - /* Mark transport as closed and wake up all pending tasks */ - xprt_disconnect_done(xprt); -} - /** * xs_error_report - callback to handle TCP socket state errors * @sk: socket @@ -833,9 +826,6 @@ static void xs_error_report(struct sock *sk) err = -sk->sk_err; if (err == 0) goto out; - /* Is this a reset event? */ - if (sk->sk_state == TCP_CLOSE) - xs_sock_mark_closed(xprt); dprintk("RPC: xs_error_report client %p, error=%d...\n", xprt, -err); trace_rpc_socket_error(xprt, sk->sk_socket, err); @@ -1078,18 +1068,18 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, /* Suck it into the iovec, verify checksum if not done by hw. */ if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { - __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); spin_lock(&xprt->recv_lock); + __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); goto out_unpin; } - __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); spin_lock_bh(&xprt->transport_lock); xprt_adjust_cwnd(xprt, task, copied); spin_unlock_bh(&xprt->transport_lock); spin_lock(&xprt->recv_lock); xprt_complete_rqst(task, copied); + __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); out_unpin: xprt_unpin_rqst(rovr); out_unlock: @@ -1655,9 +1645,11 @@ static void xs_tcp_state_change(struct sock *sk) if (test_and_clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state)) xprt_clear_connecting(xprt); + clear_bit(XPRT_CLOSING, &xprt->state); if (sk->sk_err) xprt_wake_pending_tasks(xprt, -sk->sk_err); - xs_sock_mark_closed(xprt); + /* Trigger the socket release */ + xs_tcp_force_close(xprt); } out: read_unlock_bh(&sk->sk_callback_lock); @@ -2265,14 +2257,19 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct socket *sock = transport->sock; + int skst = transport->inet ? transport->inet->sk_state : TCP_CLOSE; if (sock == NULL) return; - if (xprt_connected(xprt)) { + switch (skst) { + default: kernel_sock_shutdown(sock, SHUT_RDWR); trace_rpc_socket_shutdown(xprt, sock); - } else + break; + case TCP_CLOSE: + case TCP_TIME_WAIT: xs_reset_transport(transport); + } } static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 55d8ba92291d..4e1c6f6450bb 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -208,8 +208,8 @@ bool tipc_msg_validate(struct sk_buff **_skb) int msz, hsz; /* Ensure that flow control ratio condition is satisfied */ - if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) { - skb = skb_copy(skb, GFP_ATOMIC); + if (unlikely(skb->truesize / buf_roundup_len(skb) >= 4)) { + skb = skb_copy_expand(skb, BUF_HEADROOM, 0, GFP_ATOMIC); if (!skb) return false; kfree_skb(*_skb); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 163f3a547501..b0323ec7971e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -721,31 +721,31 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_shutdown & RCV_SHUTDOWN) - revents |= POLLRDHUP | POLLIN | POLLRDNORM; + revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) - revents |= POLLHUP; + revents |= EPOLLHUP; switch (sk->sk_state) { case TIPC_ESTABLISHED: case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) - revents |= POLLOUT; + revents |= EPOLLOUT; /* fall thru' */ case TIPC_LISTEN: if (!skb_queue_empty(&sk->sk_receive_queue)) - revents |= POLLIN | POLLRDNORM; + revents |= EPOLLIN | EPOLLRDNORM; break; case TIPC_OPEN: if (tsk->group_is_open && !tsk->cong_link_cnt) - revents |= POLLOUT; + revents |= EPOLLOUT; if (!tipc_sk_type_connectionless(sk)) break; if (skb_queue_empty(&sk->sk_receive_queue)) break; - revents |= POLLIN | POLLRDNORM; + revents |= EPOLLIN | EPOLLRDNORM; break; case TIPC_DISCONNECTING: - revents = POLLIN | POLLRDNORM | POLLHUP; + revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP; break; } return revents; @@ -1897,8 +1897,8 @@ static void tipc_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | - POLLWRNORM | POLLWRBAND); + wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | + EPOLLWRNORM | EPOLLWRBAND); rcu_read_unlock(); } @@ -1914,8 +1914,8 @@ static void tipc_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLIN | - POLLRDNORM | POLLRDBAND); + wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | + EPOLLRDNORM | EPOLLRDBAND); rcu_read_unlock(); } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 736719c8314e..b0d5fcea47e7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -484,6 +484,8 @@ out: static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .name = "tls", + .uid = TCP_ULP_TLS, + .user_visible = true, .owner = THIS_MODULE, .init = tls_init, }; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0214acbd6bff..d545e1d0dea2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -415,9 +415,9 @@ static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, { unix_dgram_peer_wake_disconnect(sk, other); wake_up_interruptible_poll(sk_sleep(sk), - POLLOUT | - POLLWRNORM | - POLLWRBAND); + EPOLLOUT | + EPOLLWRNORM | + EPOLLWRBAND); } /* preconditions: @@ -454,7 +454,7 @@ static void unix_write_space(struct sock *sk) wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, - POLLOUT | POLLWRNORM | POLLWRBAND); + EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); @@ -2129,8 +2129,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, if (wq_has_sleeper(&u->peer_wait)) wake_up_interruptible_sync_poll(&u->peer_wait, - POLLOUT | POLLWRNORM | - POLLWRBAND); + EPOLLOUT | EPOLLWRNORM | + EPOLLWRBAND); if (msg->msg_name) unix_copy_addr(msg, skb->sk); @@ -2650,27 +2650,27 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa /* exceptional events? */ if (sk->sk_err) - mask |= POLLERR; + mask |= EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP | POLLIN | POLLRDNORM; + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; /* readable? */ if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE) - mask |= POLLHUP; + mask |= EPOLLHUP; /* * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. */ if (unix_writable(sk)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; } @@ -2687,29 +2687,29 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR | - (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + mask |= EPOLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP | POLLIN | POLLRDNORM; + mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + mask |= EPOLLHUP; /* readable? */ if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if (sk->sk_type == SOCK_SEQPACKET) { if (sk->sk_state == TCP_CLOSE) - mask |= POLLHUP; + mask |= EPOLLHUP; /* connection hasn't started yet? */ if (sk->sk_state == TCP_SYN_SENT) return mask; } /* No write status requested, avoid expensive OUT tests. */ - if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT))) + if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) return mask; writable = unix_writable(sk); @@ -2726,7 +2726,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, } if (writable) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9d95e773f4c8..e0fc84daed94 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -865,20 +865,20 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, if (sk->sk_err) /* Signify that there has been an error on this socket. */ - mask |= POLLERR; + mask |= EPOLLERR; /* INET sockets treat local write shutdown and peer write shutdown as a - * case of POLLHUP set. + * case of EPOLLHUP set. */ if ((sk->sk_shutdown == SHUTDOWN_MASK) || ((sk->sk_shutdown & SEND_SHUTDOWN) && (vsk->peer_shutdown & SEND_SHUTDOWN))) { - mask |= POLLHUP; + mask |= EPOLLHUP; } if (sk->sk_shutdown & RCV_SHUTDOWN || vsk->peer_shutdown & SEND_SHUTDOWN) { - mask |= POLLRDHUP; + mask |= EPOLLRDHUP; } if (sock->type == SOCK_DGRAM) { @@ -888,11 +888,11 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, */ if (!skb_queue_empty(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) { - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; } if (!(sk->sk_shutdown & SEND_SHUTDOWN)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; } else if (sock->type == SOCK_STREAM) { lock_sock(sk); @@ -902,7 +902,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, */ if (sk->sk_state == TCP_LISTEN && !vsock_is_accept_queue_empty(sk)) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; /* If there is something in the queue then we can read. */ if (transport->stream_is_active(vsk) && @@ -911,10 +911,10 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, int ret = transport->notify_poll_in( vsk, 1, &data_ready_now); if (ret < 0) { - mask |= POLLERR; + mask |= EPOLLERR; } else { if (data_ready_now) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; } } @@ -925,7 +925,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, */ if (sk->sk_shutdown & RCV_SHUTDOWN || vsk->peer_shutdown & SEND_SHUTDOWN) { - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; } /* Connected sockets that can produce data can be written. */ @@ -935,25 +935,25 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, int ret = transport->notify_poll_out( vsk, 1, &space_avail_now); if (ret < 0) { - mask |= POLLERR; + mask |= EPOLLERR; } else { if (space_avail_now) - /* Remove POLLWRBAND since INET + /* Remove EPOLLWRBAND since INET * sockets are not setting it. */ - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } } } /* Simulate INET socket poll behaviors, which sets - * POLLOUT|POLLWRNORM when peer is closed and nothing to read, + * EPOLLOUT|EPOLLWRNORM when peer is closed and nothing to read, * but local send is not shutdown. */ if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) - mask |= POLLOUT | POLLWRNORM; + mask |= EPOLLOUT | EPOLLWRNORM; } |