summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_fd.c26
-rw-r--r--net/atm/common.c8
-rw-r--r--net/batman-adv/icmp_socket.c2
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/bluetooth/af_bluetooth.c16
-rw-r--r--net/caif/caif_socket.c12
-rw-r--r--net/core/datagram.c16
-rw-r--r--net/core/rtnetlink.c48
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/core/sock.c10
-rw-r--r--net/core/stream.c4
-rw-r--r--net/dccp/proto.c12
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/netfilter/Kconfig3
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c1
-rw-r--r--net/ipv4/tcp.c34
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_ulp.c59
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/netfilter/Kconfig3
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c1
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c1
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/iucv/af_iucv.c18
-rw-r--r--net/kcm/kcmsock.c6
-rw-r--r--net/mpls/af_mpls.c24
-rw-r--r--net/netfilter/Kconfig8
-rw-r--r--net/netfilter/nf_flow_table.c76
-rw-r--r--net/netfilter/nf_flow_table_inet.c1
-rw-r--r--net/netfilter/nf_tables_api.c17
-rw-r--r--net/netfilter/nft_flow_offload.c24
-rw-r--r--net/netfilter/x_tables.c7
-rw-r--r--net/netfilter/xt_RATEEST.c22
-rw-r--r--net/netfilter/xt_cgroup.c1
-rw-r--r--net/netlink/genetlink.c12
-rw-r--r--net/nfc/llcp_sock.c16
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/socket.c10
-rw-r--r--net/rds/af_rds.c16
-rw-r--r--net/rds/cong.c2
-rw-r--r--net/rds/connection.c15
-rw-r--r--net/rds/ib.c17
-rw-r--r--net/rds/ib_cm.c1
-rw-r--r--net/rds/rds.h7
-rw-r--r--net/rds/send.c10
-rw-r--r--net/rds/tcp.c42
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_recv.c2
-rw-r--r--net/rds/tcp_send.c2
-rw-r--r--net/rds/threads.c6
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rxrpc/af_rxrpc.c4
-rw-r--r--net/rxrpc/conn_client.c3
-rw-r--r--net/rxrpc/conn_event.c1
-rw-r--r--net/rxrpc/conn_object.c16
-rw-r--r--net/rxrpc/rxkad.c92
-rw-r--r--net/sched/cls_u32.c3
-rw-r--r--net/sched/sch_netem.c2
-rw-r--r--net/sctp/sm_make_chunk.c7
-rw-r--r--net/sctp/socket.c20
-rw-r--r--net/smc/af_smc.c26
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/smc/smc_tx.c4
-rw-r--r--net/sunrpc/cache.c4
-rw-r--r--net/sunrpc/rpc_pipe.c6
-rw-r--r--net/sunrpc/sched.c16
-rw-r--r--net/sunrpc/xprt.c3
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c2
-rw-r--r--net/sunrpc/xprtrdma/verbs.c8
-rw-r--r--net/sunrpc/xprtsock.c27
-rw-r--r--net/tipc/msg.c4
-rw-r--r--net/tipc/socket.c22
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/unix/af_unix.c40
-rw-r--r--net/vmw_vsock/af_vsock.c30
78 files changed, 612 insertions, 394 deletions
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index d6f7f7cb79c4..0cfba919d167 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -240,7 +240,7 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
if (!ts) {
if (err)
*err = -EREMOTEIO;
- return POLLERR;
+ return EPOLLERR;
}
if (!ts->rd->f_op->poll)
@@ -253,7 +253,7 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
n = DEFAULT_POLLMASK;
else
n = ts->wr->f_op->poll(ts->wr, pt);
- ret = (ret & ~POLLOUT) | (n & ~POLLIN);
+ ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN);
}
return ret;
@@ -396,11 +396,11 @@ end_clear:
if (!list_empty(&m->req_list)) {
if (test_and_clear_bit(Rpending, &m->wsched))
- n = POLLIN;
+ n = EPOLLIN;
else
n = p9_fd_poll(m->client, NULL, NULL);
- if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
+ if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
schedule_work(&m->rq);
}
@@ -505,11 +505,11 @@ end_clear:
if (m->wsize || !list_empty(&m->unsent_req_list)) {
if (test_and_clear_bit(Wpending, &m->wsched))
- n = POLLOUT;
+ n = EPOLLOUT;
else
n = p9_fd_poll(m->client, NULL, NULL);
- if ((n & POLLOUT) &&
+ if ((n & EPOLLOUT) &&
!test_and_set_bit(Wworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
schedule_work(&m->wq);
@@ -599,12 +599,12 @@ static void p9_conn_create(struct p9_client *client)
init_poll_funcptr(&m->pt, p9_pollwait);
n = p9_fd_poll(client, &m->pt, NULL);
- if (n & POLLIN) {
+ if (n & EPOLLIN) {
p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
set_bit(Rpending, &m->wsched);
}
- if (n & POLLOUT) {
+ if (n & EPOLLOUT) {
p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
set_bit(Wpending, &m->wsched);
}
@@ -625,12 +625,12 @@ static void p9_poll_mux(struct p9_conn *m)
return;
n = p9_fd_poll(m->client, NULL, &err);
- if (n & (POLLERR | POLLHUP | POLLNVAL)) {
+ if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) {
p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);
p9_conn_cancel(m, err);
}
- if (n & POLLIN) {
+ if (n & EPOLLIN) {
set_bit(Rpending, &m->wsched);
p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);
if (!test_and_set_bit(Rworksched, &m->wsched)) {
@@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m)
}
}
- if (n & POLLOUT) {
+ if (n & EPOLLOUT) {
set_bit(Wpending, &m->wsched);
p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
@@ -678,11 +678,11 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
spin_unlock(&client->lock);
if (test_and_clear_bit(Wpending, &m->wsched))
- n = POLLOUT;
+ n = EPOLLOUT;
else
n = p9_fd_poll(m->client, NULL, NULL);
- if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
+ if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
schedule_work(&m->wq);
return 0;
diff --git a/net/atm/common.c b/net/atm/common.c
index 6523f38c4957..fc78a0508ae1 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -661,15 +661,15 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
/* exceptional events */
if (sk->sk_err)
- mask = POLLERR;
+ mask = EPOLLERR;
if (test_bit(ATM_VF_RELEASED, &vcc->flags) ||
test_bit(ATM_VF_CLOSE, &vcc->flags))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* writable? */
if (sock->state == SS_CONNECTING &&
@@ -678,7 +678,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
if (vcc->qos.txtp.traffic_class != ATM_NONE &&
vcc_writable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 581375d0eed2..e91f29c7c638 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -304,7 +304,7 @@ static __poll_t batadv_socket_poll(struct file *file, poll_table *wait)
poll_wait(file, &socket_client->queue_wait, wait);
if (socket_client->queue_len > 0)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 9be74a44e99d..dc9fa37ddd14 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -193,7 +193,7 @@ static __poll_t batadv_log_poll(struct file *file, poll_table *wait)
poll_wait(file, &debug_log->queue_wait, wait);
if (!batadv_log_empty(debug_log))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index f897681780db..84d92a077834 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -431,7 +431,7 @@ static inline __poll_t bt_accept_poll(struct sock *parent)
if (sk->sk_state == BT_CONNECTED ||
(test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) &&
sk->sk_state == BT_CONNECT2))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
return 0;
@@ -451,20 +451,20 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
return bt_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_state == BT_CLOSED)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_state == BT_CONNECT ||
sk->sk_state == BT_CONNECT2 ||
@@ -472,7 +472,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
return mask;
if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index b109445a1df9..a6fb1b3bcad9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -924,7 +924,7 @@ static int caif_release(struct socket *sock)
caif_disconnect_client(sock_net(sk), &cf_sk->layer);
cf_sk->sk.sk_socket->state = SS_DISCONNECTING;
- wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP);
+ wake_up_interruptible_poll(sk_sleep(sk), EPOLLERR|EPOLLHUP);
sock_orphan(sk);
sk_stream_kill_queues(&cf_sk->sk);
@@ -946,23 +946,23 @@ static __poll_t caif_poll(struct file *file,
/* exceptional events? */
if (sk->sk_err)
- mask |= POLLERR;
+ mask |= EPOLLERR;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP;
+ mask |= EPOLLRDHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/*
* we set writable also when the other side has shut down the
* connection. This prevents stuck sockets.
*/
if (sock_writeable(sk) && tx_flow_is_on(cf_sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b7d9293940b5..9938952c5c78 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -75,7 +75,7 @@ static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, i
/*
* Avoid a wakeup if event not interesting for us
*/
- if (key && !(key_to_poll(key) & (POLLIN | POLLERR)))
+ if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
@@ -842,22 +842,22 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if (connection_based(sk)) {
if (sk->sk_state == TCP_CLOSE)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* connection hasn't started yet? */
if (sk->sk_state == TCP_SYN_SENT)
return mask;
@@ -865,7 +865,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
/* writable? */
if (sock_writeable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 56af8e41abfc..bc290413a49d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1951,6 +1951,38 @@ static struct net *rtnl_link_get_net_capable(const struct sk_buff *skb,
return net;
}
+/* Verify that rtnetlink requests do not pass additional properties
+ * potentially referring to different network namespaces.
+ */
+static int rtnl_ensure_unique_netns(struct nlattr *tb[],
+ struct netlink_ext_ack *extack,
+ bool netns_id_only)
+{
+
+ if (netns_id_only) {
+ if (!tb[IFLA_NET_NS_PID] && !tb[IFLA_NET_NS_FD])
+ return 0;
+
+ NL_SET_ERR_MSG(extack, "specified netns attribute not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (tb[IFLA_IF_NETNSID] && (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]))
+ goto invalid_attr;
+
+ if (tb[IFLA_NET_NS_PID] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_FD]))
+ goto invalid_attr;
+
+ if (tb[IFLA_NET_NS_FD] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_PID]))
+ goto invalid_attr;
+
+ return 0;
+
+invalid_attr:
+ NL_SET_ERR_MSG(extack, "multiple netns identifying attributes specified");
+ return -EINVAL;
+}
+
static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
{
if (dev) {
@@ -2553,6 +2585,10 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ err = rtnl_ensure_unique_netns(tb, extack, false);
+ if (err < 0)
+ goto errout;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2649,6 +2685,10 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ err = rtnl_ensure_unique_netns(tb, extack, true);
+ if (err < 0)
+ return err;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
@@ -2802,6 +2842,10 @@ replay:
if (err < 0)
return err;
+ err = rtnl_ensure_unique_netns(tb, extack, false);
+ if (err < 0)
+ return err;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -3045,6 +3089,10 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ err = rtnl_ensure_unique_netns(tb, extack, true);
+ if (err < 0)
+ return err;
+
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8c61c27c1b28..09bd89c90a71 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3894,10 +3894,12 @@ EXPORT_SYMBOL_GPL(skb_gro_receive);
void __init skb_init(void)
{
- skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+ skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ offsetof(struct sk_buff, cb),
+ sizeof_field(struct sk_buff, cb),
NULL);
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
sizeof(struct sk_buff_fclones),
diff --git a/net/core/sock.c b/net/core/sock.c
index b026e1717df4..c501499a04fe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2619,7 +2619,7 @@ static void sock_def_error_report(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_poll(&wq->wait, POLLERR);
+ wake_up_interruptible_poll(&wq->wait, EPOLLERR);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
rcu_read_unlock();
}
@@ -2631,8 +2631,8 @@ static void sock_def_readable(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
- POLLRDNORM | POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
+ EPOLLRDNORM | EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
@@ -2649,8 +2649,8 @@ static void sock_def_write_space(struct sock *sk)
if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
- POLLWRNORM | POLLWRBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
diff --git a/net/core/stream.c b/net/core/stream.c
index 1cff9c6270c6..7d329fb1f553 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -38,8 +38,8 @@ void sk_stream_write_space(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_poll(&wq->wait, POLLOUT |
- POLLWRNORM | POLLWRBAND);
+ wake_up_interruptible_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 74685fecfdb9..15bdc002d90c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -338,21 +338,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
mask = 0;
if (sk->sk_err)
- mask = POLLERR;
+ mask = EPOLLERR;
if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected? */
if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
if (atomic_read(&sk->sk_rmem_alloc) > 0)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
if (sk_stream_is_writeable(sk)) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -362,7 +362,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
* IO signal will be lost.
*/
if (sk_stream_is_writeable(sk))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
}
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index cc1b505453a8..91dd09f79808 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1216,7 +1216,7 @@ static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wai
__poll_t mask = datagram_poll(file, sock, wait);
if (!skb_queue_empty(&scp->other_receive_queue))
- mask |= POLLRDBAND;
+ mask |= EPOLLRDBAND;
return mask;
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c24008daa3d8..e4329e161943 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -828,7 +828,7 @@ int inet_shutdown(struct socket *sock, int how)
case TCP_CLOSE:
err = -ENOTCONN;
/* Hack to wake up other listeners, who can poll for
- POLLHUP, even on eg. unconnected UDP sockets -- RR */
+ EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
/* fall through */
default:
sk->sk_shutdown |= how;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 5f52236780b4..dfe6fa4ea554 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -80,8 +80,7 @@ endif # NF_TABLES
config NF_FLOW_TABLE_IPV4
tristate "Netfilter flow table IPv4 module"
- depends on NF_CONNTRACK && NF_TABLES
- select NF_FLOW_TABLE
+ depends on NF_FLOW_TABLE
help
This option adds the flow table IPv4 support.
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index b2d01eb25f2c..25d2975da156 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -260,6 +260,7 @@ static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
+ .free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c059aa7df0a9..48636aee23c3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -512,36 +512,36 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
mask = 0;
/*
- * POLLHUP is certainly not done right. But poll() doesn't
+ * EPOLLHUP is certainly not done right. But poll() doesn't
* have a notion of HUP in just one direction, and for a
* socket the read side is more interesting.
*
- * Some poll() documentation says that POLLHUP is incompatible
- * with the POLLOUT/POLLWR flags, so somebody should check this
+ * Some poll() documentation says that EPOLLHUP is incompatible
+ * with the EPOLLOUT/POLLWR flags, so somebody should check this
* all. But careful, it tends to be safer to return too many
* bits than too few, and you can easily break real applications
* if you don't tell them that something has hung up!
*
* Check-me.
*
- * Check number 1. POLLHUP is _UNMASKABLE_ event (see UNIX98 and
+ * Check number 1. EPOLLHUP is _UNMASKABLE_ event (see UNIX98 and
* our fs/select.c). It means that after we received EOF,
* poll always returns immediately, making impossible poll() on write()
- * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP
+ * in state CLOSE_WAIT. One solution is evident --- to set EPOLLHUP
* if and only if shutdown has been made in both directions.
* Actually, it is interesting to look how Solaris and DUX
- * solve this dilemma. I would prefer, if POLLHUP were maskable,
+ * solve this dilemma. I would prefer, if EPOLLHUP were maskable,
* then we could set it on SND_SHUTDOWN. BTW examples given
* in Stevens' books assume exactly this behaviour, it explains
- * why POLLHUP is incompatible with POLLOUT. --ANK
+ * why EPOLLHUP is incompatible with EPOLLOUT. --ANK
*
* NOTE. Check for TCP_CLOSE is added. The goal is to prevent
* blocking on fresh not-connected or disconnected socket. --ANK
*/
if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected or passive Fast Open socket? */
if (state != TCP_SYN_SENT &&
@@ -554,11 +554,11 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
target++;
if (tp->rcv_nxt - tp->copied_seq >= target)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
if (sk_stream_is_writeable(sk)) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -570,24 +570,24 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
*/
smp_mb__after_atomic();
if (sk_stream_is_writeable(sk))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
} else
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (tp->urg_data & TCP_URG_VALID)
- mask |= POLLPRI;
+ mask |= EPOLLPRI;
} else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
/* Active TCP fastopen socket with defer_connect
- * Return POLLOUT so application can call write()
+ * Return EPOLLOUT so application can call write()
* in order for kernel to generate SYN+data
*/
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
/* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb();
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= EPOLLERR;
return mask;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cfa51cfd2d99..575d3c1fb6e8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -315,7 +315,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
/* Fast Recovery (RFC 5681 3.2) :
* Cubic needs 1.7 factor, rounded to 2 to include
- * extra cushion (application might react slowly to POLLOUT)
+ * extra cushion (application might react slowly to EPOLLOUT)
*/
sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
sndmem *= nr_segs * per_mss;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 95738aa0d8a6..f8ad397e285e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -705,7 +705,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
*/
if (sk) {
arg.bound_dev_if = sk->sk_bound_dev_if;
- trace_tcp_send_reset(sk, skb);
+ if (sk_fullsock(sk))
+ trace_tcp_send_reset(sk, skb);
}
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 6bb9e14c710a..622caa4039e0 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -29,6 +29,18 @@ static struct tcp_ulp_ops *tcp_ulp_find(const char *name)
return NULL;
}
+static struct tcp_ulp_ops *tcp_ulp_find_id(const int ulp)
+{
+ struct tcp_ulp_ops *e;
+
+ list_for_each_entry_rcu(e, &tcp_ulp_list, list) {
+ if (e->uid == ulp)
+ return e;
+ }
+
+ return NULL;
+}
+
static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
{
const struct tcp_ulp_ops *ulp = NULL;
@@ -51,6 +63,18 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
return ulp;
}
+static const struct tcp_ulp_ops *__tcp_ulp_lookup(const int uid)
+{
+ const struct tcp_ulp_ops *ulp;
+
+ rcu_read_lock();
+ ulp = tcp_ulp_find_id(uid);
+ if (!ulp || !try_module_get(ulp->owner))
+ ulp = NULL;
+ rcu_read_unlock();
+ return ulp;
+}
+
/* Attach new upper layer protocol to the list
* of available protocols.
*/
@@ -59,13 +83,10 @@ int tcp_register_ulp(struct tcp_ulp_ops *ulp)
int ret = 0;
spin_lock(&tcp_ulp_list_lock);
- if (tcp_ulp_find(ulp->name)) {
- pr_notice("%s already registered or non-unique name\n",
- ulp->name);
+ if (tcp_ulp_find(ulp->name))
ret = -EEXIST;
- } else {
+ else
list_add_tail_rcu(&ulp->list, &tcp_ulp_list);
- }
spin_unlock(&tcp_ulp_list_lock);
return ret;
@@ -124,6 +145,34 @@ int tcp_set_ulp(struct sock *sk, const char *name)
if (!ulp_ops)
return -ENOENT;
+ if (!ulp_ops->user_visible) {
+ module_put(ulp_ops->owner);
+ return -ENOENT;
+ }
+
+ err = ulp_ops->init(sk);
+ if (err) {
+ module_put(ulp_ops->owner);
+ return err;
+ }
+
+ icsk->icsk_ulp_ops = ulp_ops;
+ return 0;
+}
+
+int tcp_set_ulp_id(struct sock *sk, int ulp)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct tcp_ulp_ops *ulp_ops;
+ int err;
+
+ if (icsk->icsk_ulp_ops)
+ return -EEXIST;
+
+ ulp_ops = __tcp_ulp_lookup(ulp);
+ if (!ulp_ops)
+ return -ENOENT;
+
err = ulp_ops->init(sk);
if (err) {
module_put(ulp_ops->owner);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f81f969f9c06..bfaefe560b5c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2501,12 +2501,12 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Check for false positives due to checksum errors */
- if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+ if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
- mask &= ~(POLLIN | POLLRDNORM);
+ mask &= ~(EPOLLIN | EPOLLRDNORM);
return mask;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4a634b7a2c80..d395d1590699 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -73,8 +73,7 @@ endif # NF_TABLES
config NF_FLOW_TABLE_IPV6
tristate "Netfilter flow table IPv6 module"
- depends on NF_CONNTRACK && NF_TABLES
- select NF_FLOW_TABLE
+ depends on NF_FLOW_TABLE
help
This option adds the flow table IPv6 support.
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index ce53dcfda88a..b84ce3e6d728 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -264,6 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
+ inet_frag_kill(&fq->q, &nf_frags);
return -EPROTO;
}
if (end > fq->q.len) {
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index fff21602875a..d346705d6ee6 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -253,6 +253,7 @@ static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
+ .free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
};
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fb2d251c0500..9dcfadddd800 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2479,7 +2479,7 @@ static int ip6_route_check_nh_onlink(struct net *net,
struct net_device *dev,
struct netlink_ext_ack *extack)
{
- u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+ u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
const struct in6_addr *gw_addr = &cfg->fc_gateway;
u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
struct rt6_info *grt;
@@ -2488,8 +2488,10 @@ static int ip6_route_check_nh_onlink(struct net *net,
err = 0;
grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
if (grt) {
- if (grt->rt6i_flags & flags || dev != grt->dst.dev) {
- NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ if (!grt->dst.error &&
+ (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop has invalid gateway or device mismatch");
err = -EINVAL;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a1ab29e2ab3b..412139f4eccd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -942,7 +942,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk) {
oif = sk->sk_bound_dev_if;
- trace_tcp_send_reset(sk, skb);
+ if (sk_fullsock(sk))
+ trace_tcp_send_reset(sk, skb);
}
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 64331158d693..1e8cc7bcbca3 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1483,7 +1483,7 @@ static inline __poll_t iucv_accept_poll(struct sock *parent)
sk = (struct sock *) isk;
if (sk->sk_state == IUCV_CONNECTED)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
return 0;
@@ -1501,27 +1501,27 @@ __poll_t iucv_sock_poll(struct file *file, struct socket *sock,
return iucv_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP;
+ mask |= EPOLLRDHUP;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (!skb_queue_empty(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_state == IUCV_CLOSED)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_state == IUCV_DISCONN)
- mask |= POLLIN;
+ mask |= EPOLLIN;
if (sock_writeable(sk) && iucv_below_msglim(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 4a8d407f8902..f297d53a11aa 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -396,8 +396,8 @@ static int kcm_read_sock_done(struct strparser *strp, int err)
static void psock_state_change(struct sock *sk)
{
- /* TCP only does a POLLIN for a half close. Do a POLLHUP here
- * since application will normally not poll with POLLIN
+ /* TCP only does a EPOLLIN for a half close. Do a EPOLLHUP here
+ * since application will normally not poll with EPOLLIN
* on the TCP sockets.
*/
@@ -1338,7 +1338,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
/* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
* we set sk_state, otherwise epoll_wait always returns right away with
- * POLLHUP
+ * EPOLLHUP
*/
kcm->sk.sk_state = TCP_ESTABLISHED;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 5dce8336d33f..e545a3c9365f 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -8,6 +8,7 @@
#include <linux/ipv6.h>
#include <linux/mpls.h>
#include <linux/netconf.h>
+#include <linux/nospec.h>
#include <linux/vmalloc.h>
#include <linux/percpu.h>
#include <net/ip.h>
@@ -935,24 +936,27 @@ errout:
return err;
}
-static bool mpls_label_ok(struct net *net, unsigned int index,
+static bool mpls_label_ok(struct net *net, unsigned int *index,
struct netlink_ext_ack *extack)
{
+ bool is_ok = true;
+
/* Reserved labels may not be set */
- if (index < MPLS_LABEL_FIRST_UNRESERVED) {
+ if (*index < MPLS_LABEL_FIRST_UNRESERVED) {
NL_SET_ERR_MSG(extack,
"Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher");
- return false;
+ is_ok = false;
}
/* The full 20 bit range may not be supported. */
- if (index >= net->mpls.platform_labels) {
+ if (is_ok && *index >= net->mpls.platform_labels) {
NL_SET_ERR_MSG(extack,
"Label >= configured maximum in platform_labels");
- return false;
+ is_ok = false;
}
- return true;
+ *index = array_index_nospec(*index, net->mpls.platform_labels);
+ return is_ok;
}
static int mpls_route_add(struct mpls_route_config *cfg,
@@ -975,7 +979,7 @@ static int mpls_route_add(struct mpls_route_config *cfg,
index = find_free_label(net);
}
- if (!mpls_label_ok(net, index, extack))
+ if (!mpls_label_ok(net, &index, extack))
goto errout;
/* Append makes no sense with mpls */
@@ -1052,7 +1056,7 @@ static int mpls_route_del(struct mpls_route_config *cfg,
index = cfg->rc_label;
- if (!mpls_label_ok(net, index, extack))
+ if (!mpls_label_ok(net, &index, extack))
goto errout;
mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
@@ -1810,7 +1814,7 @@ static int rtm_to_route_config(struct sk_buff *skb,
goto errout;
if (!mpls_label_ok(cfg->rc_nlinfo.nl_net,
- cfg->rc_label, extack))
+ &cfg->rc_label, extack))
goto errout;
break;
}
@@ -2137,7 +2141,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
goto errout;
}
- if (!mpls_label_ok(net, in_label, extack)) {
+ if (!mpls_label_ok(net, &in_label, extack)) {
err = -EINVAL;
goto errout;
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9019fa98003d..d3220b43c832 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -666,8 +666,8 @@ endif # NF_TABLES
config NF_FLOW_TABLE_INET
tristate "Netfilter flow table mixed IPv4/IPv6 module"
- depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
- select NF_FLOW_TABLE
+ depends on NF_FLOW_TABLE_IPV4
+ depends on NF_FLOW_TABLE_IPV6
help
This option adds the flow table mixed IPv4/IPv6 support.
@@ -675,7 +675,9 @@ config NF_FLOW_TABLE_INET
config NF_FLOW_TABLE
tristate "Netfilter flow table module"
- depends on NF_CONNTRACK && NF_TABLES
+ depends on NETFILTER_INGRESS
+ depends on NF_CONNTRACK
+ depends on NF_TABLES
help
This option adds the flow table core infrastructure.
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c
index 2f5099cb85b8..ec410cae9307 100644
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -4,6 +4,7 @@
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
+#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -124,7 +125,9 @@ void flow_offload_free(struct flow_offload *flow)
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
e = container_of(flow, struct flow_offload_entry, flow);
- kfree(e);
+ nf_ct_delete(e->ct, 0, 0);
+ nf_ct_put(e->ct);
+ kfree_rcu(e, rcu_head);
}
EXPORT_SYMBOL_GPL(flow_offload_free);
@@ -148,11 +151,9 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
}
EXPORT_SYMBOL_GPL(flow_offload_add);
-void flow_offload_del(struct nf_flowtable *flow_table,
- struct flow_offload *flow)
+static void flow_offload_del(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
*flow_table->type->params);
@@ -160,10 +161,8 @@ void flow_offload_del(struct nf_flowtable *flow_table,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
*flow_table->type->params);
- e = container_of(flow, struct flow_offload_entry, flow);
- kfree_rcu(e, rcu_head);
+ flow_offload_free(flow);
}
-EXPORT_SYMBOL_GPL(flow_offload_del);
struct flow_offload_tuple_rhash *
flow_offload_lookup(struct nf_flowtable *flow_table,
@@ -174,15 +173,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
-static void nf_flow_release_ct(const struct flow_offload *flow)
-{
- struct flow_offload_entry *e;
-
- e = container_of(flow, struct flow_offload_entry, flow);
- nf_ct_delete(e->ct, 0, 0);
- nf_ct_put(e->ct);
-}
-
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data)
@@ -231,19 +221,16 @@ static inline bool nf_flow_is_dying(const struct flow_offload *flow)
return flow->flags & FLOW_OFFLOAD_DYING;
}
-void nf_flow_offload_work_gc(struct work_struct *work)
+static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
{
struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table;
struct rhashtable_iter hti;
struct flow_offload *flow;
int err;
- flow_table = container_of(work, struct nf_flowtable, gc_work.work);
-
err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
if (err)
- goto schedule;
+ return 0;
rhashtable_walk_start(&hti);
@@ -261,15 +248,22 @@ void nf_flow_offload_work_gc(struct work_struct *work)
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
if (nf_flow_has_expired(flow) ||
- nf_flow_is_dying(flow)) {
+ nf_flow_is_dying(flow))
flow_offload_del(flow_table, flow);
- nf_flow_release_ct(flow);
- }
}
out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
-schedule:
+
+ return 1;
+}
+
+void nf_flow_offload_work_gc(struct work_struct *work)
+{
+ struct nf_flowtable *flow_table;
+
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+ nf_flow_offload_gc_step(flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
@@ -425,5 +419,35 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
+{
+ struct net_device *dev = data;
+
+ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+ return;
+
+ flow_offload_dead(flow);
+}
+
+static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
+ void *data)
+{
+ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
+ flush_delayed_work(&flowtable->gc_work);
+}
+
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+{
+ nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+
+void nf_flow_table_free(struct nf_flowtable *flow_table)
+{
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+ WARN_ON(!nf_flow_offload_gc_step(flow_table));
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_free);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 281209aeba8f..375a1881d93d 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -24,6 +24,7 @@ static struct nf_flowtable_type flowtable_inet = {
.family = NFPROTO_INET,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
+ .free = nf_flow_table_free,
.hook = nf_flow_offload_inet_hook,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0791813a1e7d..8b9fe30de0cd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5006,13 +5006,13 @@ void nft_flow_table_iterate(struct net *net,
struct nft_flowtable *flowtable;
const struct nft_table *table;
- rcu_read_lock();
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_for_each_entry(table, &net->nft.tables, list) {
+ list_for_each_entry(flowtable, &table->flowtables, list) {
iter(&flowtable->data, data);
}
}
- rcu_read_unlock();
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
@@ -5399,17 +5399,12 @@ err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
-static void nft_flowtable_destroy(void *ptr, void *arg)
-{
- kfree(ptr);
-}
-
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
cancel_delayed_work_sync(&flowtable->data.gc_work);
kfree(flowtable->name);
- rhashtable_free_and_destroy(&flowtable->data.rhashtable,
- nft_flowtable_destroy, NULL);
+ flowtable->data.type->free(&flowtable->data);
+ rhashtable_destroy(&flowtable->data.rhashtable);
module_put(flowtable->data.type->owner);
}
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 4503b8dcf9c0..b65829b2be22 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -194,22 +194,6 @@ static struct nft_expr_type nft_flow_offload_type __read_mostly = {
.owner = THIS_MODULE,
};
-static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
-{
- struct net_device *dev = data;
-
- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
- return;
-
- flow_offload_dead(flow);
-}
-
-static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
- void *data)
-{
- nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
-}
-
static int flow_offload_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -218,7 +202,7 @@ static int flow_offload_netdev_event(struct notifier_block *this,
if (event != NETDEV_DOWN)
return NOTIFY_DONE;
- nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
+ nf_flow_table_cleanup(dev_net(dev), dev);
return NOTIFY_DONE;
}
@@ -246,14 +230,8 @@ register_expr:
static void __exit nft_flow_offload_module_exit(void)
{
- struct net *net;
-
nft_unregister_expr(&nft_flow_offload_type);
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
- rtnl_lock();
- for_each_net(net)
- nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
- rtnl_unlock();
}
module_init(nft_flow_offload_module_init);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8fa4d37141a7..2f685ee1f9c8 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1008,7 +1008,12 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
if ((size >> PAGE_SHIFT) + 2 > totalram_pages)
return NULL;
- info = kvmalloc(sz, GFP_KERNEL);
+ /* __GFP_NORETRY is not fully supported by kvmalloc but it should
+ * work reasonably well if sz is too large and bail out rather
+ * than shoot all processes down before realizing there is nothing
+ * more to reclaim.
+ */
+ info = kvmalloc(sz, GFP_KERNEL | __GFP_NORETRY);
if (!info)
return NULL;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 498b54fd04d7..141c295191f6 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -39,23 +39,31 @@ static void xt_rateest_hash_insert(struct xt_rateest *est)
hlist_add_head(&est->list, &rateest_hash[h]);
}
-struct xt_rateest *xt_rateest_lookup(const char *name)
+static struct xt_rateest *__xt_rateest_lookup(const char *name)
{
struct xt_rateest *est;
unsigned int h;
h = xt_rateest_hash(name);
- mutex_lock(&xt_rateest_mutex);
hlist_for_each_entry(est, &rateest_hash[h], list) {
if (strcmp(est->name, name) == 0) {
est->refcnt++;
- mutex_unlock(&xt_rateest_mutex);
return est;
}
}
- mutex_unlock(&xt_rateest_mutex);
+
return NULL;
}
+
+struct xt_rateest *xt_rateest_lookup(const char *name)
+{
+ struct xt_rateest *est;
+
+ mutex_lock(&xt_rateest_mutex);
+ est = __xt_rateest_lookup(name);
+ mutex_unlock(&xt_rateest_mutex);
+ return est;
+}
EXPORT_SYMBOL_GPL(xt_rateest_lookup);
void xt_rateest_put(struct xt_rateest *est)
@@ -100,8 +108,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
- est = xt_rateest_lookup(info->name);
+ mutex_lock(&xt_rateest_mutex);
+ est = __xt_rateest_lookup(info->name);
if (est) {
+ mutex_unlock(&xt_rateest_mutex);
/*
* If estimator parameters are specified, they must match the
* existing estimator.
@@ -139,11 +149,13 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
info->est = est;
xt_rateest_hash_insert(est);
+ mutex_unlock(&xt_rateest_mutex);
return 0;
err2:
kfree(est);
err1:
+ mutex_unlock(&xt_rateest_mutex);
return ret;
}
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 1db1ce59079f..891f4e7e8ea7 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -52,6 +52,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
return -EINVAL;
}
+ info->priv = NULL;
if (info->has_path) {
cgrp = cgroup_get_from_path(info->path);
if (IS_ERR(cgrp)) {
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d444daf1ac04..6f02499ef007 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1081,6 +1081,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
{
struct sk_buff *tmp;
struct net *net, *prev = NULL;
+ bool delivered = false;
int err;
for_each_net_rcu(net) {
@@ -1092,14 +1093,21 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
}
err = nlmsg_multicast(prev->genl_sock, tmp,
portid, group, flags);
- if (err)
+ if (!err)
+ delivered = true;
+ else if (err != -ESRCH)
goto error;
}
prev = net;
}
- return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags);
+ err = nlmsg_multicast(prev->genl_sock, skb, portid, group, flags);
+ if (!err)
+ delivered = true;
+ else if (err != -ESRCH)
+ goto error;
+ return delivered ? 0 : -ESRCH;
error:
kfree_skb(skb);
return err;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 985909f105eb..376040092142 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -543,7 +543,7 @@ static inline __poll_t llcp_accept_poll(struct sock *parent)
sk = &llcp_sock->sk;
if (sk->sk_state == LLCP_CONNECTED)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
}
return 0;
@@ -563,23 +563,23 @@ static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
return llcp_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_state == LLCP_CLOSED)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1d1483007e46..e0f3f4aeeb4f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4085,7 +4085,7 @@ static __poll_t packet_poll(struct file *file, struct socket *sock,
if (po->rx_ring.pg_vec) {
if (!packet_previous_rx_frame(po, &po->rx_ring,
TP_STATUS_KERNEL))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
po->pressure = 0;
@@ -4093,7 +4093,7 @@ static __poll_t packet_poll(struct file *file, struct socket *sock,
spin_lock_bh(&sk->sk_write_queue.lock);
if (po->tx_ring.pg_vec) {
if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
spin_unlock_bh(&sk->sk_write_queue.lock);
return mask;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 08f6751d2030..fffcd69f63ff 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -351,18 +351,18 @@ static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == TCP_CLOSE)
- return POLLERR;
+ return EPOLLERR;
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (!skb_queue_empty(&pn->ctrlreq_queue))
- mask |= POLLPRI;
+ mask |= EPOLLPRI;
if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
- return POLLHUP;
+ return EPOLLHUP;
if (sk->sk_state == TCP_ESTABLISHED &&
refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
atomic_read(&pn->tx_credits))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 88aa8ad0f5b6..744c637c86b0 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -137,17 +137,17 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
/*
* RDS' poll is without a doubt the least intuitive part of the interface,
- * as POLLIN and POLLOUT do not behave entirely as you would expect from
+ * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from
* a network protocol.
*
- * POLLIN is asserted if
+ * EPOLLIN is asserted if
* - there is data on the receive queue.
* - to signal that a previously congested destination may have become
* uncongested
* - A notification has been queued to the socket (this can be a congestion
* update, or a RDMA completion).
*
- * POLLOUT is asserted if there is room on the send queue. This does not mean
+ * EPOLLOUT is asserted if there is room on the send queue. This does not mean
* however, that the next sendmsg() call will succeed. If the application tries
* to send to a congested destination, the system call may still fail (and
* return ENOBUFS).
@@ -167,22 +167,22 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
read_lock_irqsave(&rs->rs_recv_lock, flags);
if (!rs->rs_cong_monitor) {
- /* When a congestion map was updated, we signal POLLIN for
+ /* When a congestion map was updated, we signal EPOLLIN for
* "historical" reasons. Applications can also poll for
* WRBAND instead. */
if (rds_cong_updated_since(&rs->rs_cong_track))
- mask |= (POLLIN | POLLRDNORM | POLLWRBAND);
+ mask |= (EPOLLIN | EPOLLRDNORM | EPOLLWRBAND);
} else {
spin_lock(&rs->rs_lock);
if (rs->rs_cong_notify)
- mask |= (POLLIN | POLLRDNORM);
+ mask |= (EPOLLIN | EPOLLRDNORM);
spin_unlock(&rs->rs_lock);
}
if (!list_empty(&rs->rs_recv_queue) ||
!list_empty(&rs->rs_notify_queue))
- mask |= (POLLIN | POLLRDNORM);
+ mask |= (EPOLLIN | EPOLLRDNORM);
if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
- mask |= (POLLOUT | POLLWRNORM);
+ mask |= (EPOLLOUT | EPOLLWRNORM);
read_unlock_irqrestore(&rs->rs_recv_lock, flags);
/* clear state any time we wake a seen-congested socket */
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 8d19fd25dce3..63da9d2f142d 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -223,7 +223,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
rcu_read_lock();
if (!test_and_set_bit(0, &conn->c_map_queued) &&
- !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ !rds_destroy_pending(cp->cp_conn)) {
rds_stats_inc(s_cong_update_queued);
/* We cannot inline the call to rds_send_xmit() here
* for two reasons (both pertaining to a TCP transport):
diff --git a/net/rds/connection.c b/net/rds/connection.c
index b10c0ef36d8d..94e190febfdd 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -220,8 +220,13 @@ static struct rds_connection *__rds_conn_create(struct net *net,
is_outgoing);
conn->c_path[i].cp_index = i;
}
- ret = trans->conn_alloc(conn, gfp);
+ rcu_read_lock();
+ if (rds_destroy_pending(conn))
+ ret = -ENETDOWN;
+ else
+ ret = trans->conn_alloc(conn, gfp);
if (ret) {
+ rcu_read_unlock();
kfree(conn->c_path);
kmem_cache_free(rds_conn_slab, conn);
conn = ERR_PTR(ret);
@@ -283,6 +288,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
}
}
spin_unlock_irqrestore(&rds_conn_lock, flags);
+ rcu_read_unlock();
out:
return conn;
@@ -382,13 +388,10 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
{
struct rds_message *rm, *rtmp;
- set_bit(RDS_DESTROY_PENDING, &cp->cp_flags);
-
if (!cp->cp_transport_data)
return;
/* make sure lingering queued work won't try to ref the conn */
- synchronize_rcu();
cancel_delayed_work_sync(&cp->cp_send_w);
cancel_delayed_work_sync(&cp->cp_recv_w);
@@ -691,7 +694,7 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
rcu_read_lock();
- if (!destroy && test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ if (!destroy && rds_destroy_pending(cp->cp_conn)) {
rcu_read_unlock();
return;
}
@@ -714,7 +717,7 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);
void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
{
rcu_read_lock();
- if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ if (rds_destroy_pending(cp->cp_conn)) {
rcu_read_unlock();
return;
}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index ff0c98096af1..50a88f3e7e39 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -48,6 +48,7 @@
static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
+static atomic_t rds_ib_unloading;
module_param(rds_ib_mr_1m_pool_size, int, 0444);
MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA");
@@ -378,8 +379,23 @@ static void rds_ib_unregister_client(void)
flush_workqueue(rds_wq);
}
+static void rds_ib_set_unloading(void)
+{
+ atomic_set(&rds_ib_unloading, 1);
+}
+
+static bool rds_ib_is_unloading(struct rds_connection *conn)
+{
+ struct rds_conn_path *cp = &conn->c_path[0];
+
+ return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) ||
+ atomic_read(&rds_ib_unloading) != 0);
+}
+
void rds_ib_exit(void)
{
+ rds_ib_set_unloading();
+ synchronize_rcu();
rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
rds_ib_unregister_client();
rds_ib_destroy_nodev_conns();
@@ -413,6 +429,7 @@ struct rds_transport rds_ib_transport = {
.flush_mrs = rds_ib_flush_mrs,
.t_owner = THIS_MODULE,
.t_name = "infiniband",
+ .t_unloading = rds_ib_is_unloading,
.t_type = RDS_TRANS_IB
};
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 80fb6f63e768..eea1d8611b20 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -117,6 +117,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
&conn->c_laddr, &conn->c_faddr,
RDS_PROTOCOL_MAJOR(conn->c_version),
RDS_PROTOCOL_MINOR(conn->c_version));
+ set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags);
rds_conn_destroy(conn);
return;
} else {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 374ae83b60d4..7301b9b01890 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -518,6 +518,7 @@ struct rds_transport {
void (*sync_mr)(void *trans_private, int direction);
void (*free_mr)(void *trans_private, int invalidate);
void (*flush_mrs)(void);
+ bool (*t_unloading)(struct rds_connection *conn);
};
struct rds_sock {
@@ -862,6 +863,12 @@ static inline void rds_mr_put(struct rds_mr *mr)
__rds_put_mr_final(mr);
}
+static inline bool rds_destroy_pending(struct rds_connection *conn)
+{
+ return !check_net(rds_conn_net(conn)) ||
+ (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn));
+}
+
/* stats.c */
DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
#define rds_stats_inc_which(which, member) do { \
diff --git a/net/rds/send.c b/net/rds/send.c
index d3e32d1f3c7d..b1b0022b8370 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -162,7 +162,7 @@ restart:
goto out;
}
- if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ if (rds_destroy_pending(cp->cp_conn)) {
release_in_xmit(cp);
ret = -ENETUNREACH; /* dont requeue send work */
goto out;
@@ -444,7 +444,7 @@ over_batch:
if (batch_count < send_batch_count)
goto restart;
rcu_read_lock();
- if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ if (rds_destroy_pending(cp->cp_conn))
ret = -ENETUNREACH;
else
queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
@@ -1162,7 +1162,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
else
cpath = &conn->c_path[0];
- if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags)) {
+ if (rds_destroy_pending(conn)) {
ret = -EAGAIN;
goto out;
}
@@ -1209,7 +1209,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
if (ret == -ENOMEM || ret == -EAGAIN) {
ret = 0;
rcu_read_lock();
- if (test_bit(RDS_DESTROY_PENDING, &cpath->cp_flags))
+ if (rds_destroy_pending(cpath->cp_conn))
ret = -ENETUNREACH;
else
queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
@@ -1295,7 +1295,7 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
/* schedule the send work on rds_wq */
rcu_read_lock();
- if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ if (!rds_destroy_pending(cp->cp_conn))
queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
rcu_read_unlock();
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 9920d2f84eff..44c4652721af 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -49,6 +49,7 @@ static unsigned int rds_tcp_tc_count;
/* Track rds_tcp_connection structs so they can be cleaned up */
static DEFINE_SPINLOCK(rds_tcp_conn_lock);
static LIST_HEAD(rds_tcp_conn_list);
+static atomic_t rds_tcp_unloading = ATOMIC_INIT(0);
static struct kmem_cache *rds_tcp_conn_slab;
@@ -274,14 +275,13 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
static void rds_tcp_conn_free(void *arg)
{
struct rds_tcp_connection *tc = arg;
- unsigned long flags;
rdsdebug("freeing tc %p\n", tc);
- spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+ spin_lock_bh(&rds_tcp_conn_lock);
if (!tc->t_tcp_node_detached)
list_del(&tc->t_tcp_node);
- spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+ spin_unlock_bh(&rds_tcp_conn_lock);
kmem_cache_free(rds_tcp_conn_slab, tc);
}
@@ -296,7 +296,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
if (!tc) {
ret = -ENOMEM;
- break;
+ goto fail;
}
mutex_init(&tc->t_conn_path_lock);
tc->t_sock = NULL;
@@ -306,14 +306,19 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
conn->c_path[i].cp_transport_data = tc;
tc->t_cpath = &conn->c_path[i];
+ tc->t_tcp_node_detached = true;
- spin_lock_irq(&rds_tcp_conn_lock);
- tc->t_tcp_node_detached = false;
- list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
- spin_unlock_irq(&rds_tcp_conn_lock);
rdsdebug("rds_conn_path [%d] tc %p\n", i,
conn->c_path[i].cp_transport_data);
}
+ spin_lock_bh(&rds_tcp_conn_lock);
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ tc = conn->c_path[i].cp_transport_data;
+ tc->t_tcp_node_detached = false;
+ list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
+ }
+ spin_unlock_bh(&rds_tcp_conn_lock);
+fail:
if (ret) {
for (j = 0; j < i; j++)
rds_tcp_conn_free(conn->c_path[j].cp_transport_data);
@@ -332,6 +337,16 @@ static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
return false;
}
+static void rds_tcp_set_unloading(void)
+{
+ atomic_set(&rds_tcp_unloading, 1);
+}
+
+static bool rds_tcp_is_unloading(struct rds_connection *conn)
+{
+ return atomic_read(&rds_tcp_unloading) != 0;
+}
+
static void rds_tcp_destroy_conns(void)
{
struct rds_tcp_connection *tc, *_tc;
@@ -370,6 +385,7 @@ struct rds_transport rds_tcp_transport = {
.t_type = RDS_TRANS_TCP,
.t_prefer_loopback = 1,
.t_mp_capable = 1,
+ .t_unloading = rds_tcp_is_unloading,
};
static unsigned int rds_tcp_netid;
@@ -513,7 +529,7 @@ static void rds_tcp_kill_sock(struct net *net)
rtn->rds_tcp_listen_sock = NULL;
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
- spin_lock_irq(&rds_tcp_conn_lock);
+ spin_lock_bh(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
@@ -526,7 +542,7 @@ static void rds_tcp_kill_sock(struct net *net)
tc->t_tcp_node_detached = true;
}
}
- spin_unlock_irq(&rds_tcp_conn_lock);
+ spin_unlock_bh(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
rds_conn_destroy(tc->t_cpath->cp_conn);
}
@@ -574,7 +590,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
- spin_lock_irq(&rds_tcp_conn_lock);
+ spin_lock_bh(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
@@ -584,7 +600,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
/* reconnect with new parameters */
rds_conn_path_drop(tc->t_cpath, false);
}
- spin_unlock_irq(&rds_tcp_conn_lock);
+ spin_unlock_bh(&rds_tcp_conn_lock);
}
static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
@@ -607,6 +623,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
static void rds_tcp_exit(void)
{
+ rds_tcp_set_unloading();
+ synchronize_rcu();
rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
unregister_pernet_subsys(&rds_tcp_net_ops);
if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 534c67aeb20f..d999e7075645 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
cp->cp_conn, tc, sock);
if (sock) {
- if (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ if (rds_destroy_pending(cp->cp_conn))
rds_tcp_set_linger(sock);
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
lock_sock(sock->sk);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index dd707b9e73e5..b9fbd2ee74ef 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -323,7 +323,7 @@ void rds_tcp_data_ready(struct sock *sk)
if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) {
rcu_read_lock();
- if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ if (!rds_destroy_pending(cp->cp_conn))
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
rcu_read_unlock();
}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 16f65744d984..7df869d37afd 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -204,7 +204,7 @@ void rds_tcp_write_space(struct sock *sk)
rcu_read_lock();
if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf &&
- !test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ !rds_destroy_pending(cp->cp_conn))
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
rcu_read_unlock();
diff --git a/net/rds/threads.c b/net/rds/threads.c
index eb76db1360b0..c52861d77a59 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -88,7 +88,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
cp->cp_reconnect_jiffies = 0;
set_bit(0, &cp->cp_conn->c_map_queued);
rcu_read_lock();
- if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags)) {
+ if (!rds_destroy_pending(cp->cp_conn)) {
queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
}
@@ -138,7 +138,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
if (cp->cp_reconnect_jiffies == 0) {
cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
rcu_read_lock();
- if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ if (!rds_destroy_pending(cp->cp_conn))
queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
rcu_read_unlock();
return;
@@ -149,7 +149,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
conn, &conn->c_laddr, &conn->c_faddr);
rcu_read_lock();
- if (!test_bit(RDS_DESTROY_PENDING, &cp->cp_flags))
+ if (!rds_destroy_pending(cp->cp_conn))
queue_delayed_work(rds_wq, &cp->cp_conn_w,
rand % cp->cp_reconnect_jiffies);
rcu_read_unlock();
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 124c77e9d058..59d0eb960275 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1142,13 +1142,13 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
static __poll_t rfkill_fop_poll(struct file *file, poll_table *wait)
{
struct rfkill_data *data = file->private_data;
- __poll_t res = POLLOUT | POLLWRNORM;
+ __poll_t res = EPOLLOUT | EPOLLWRNORM;
poll_wait(file, &data->read_wait, wait);
mutex_lock(&data->mtx);
if (!list_empty(&data->events))
- res = POLLIN | POLLRDNORM;
+ res = EPOLLIN | EPOLLRDNORM;
mutex_unlock(&data->mtx);
return res;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 21ad6a3a465c..0c9c18aa7c77 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -742,13 +742,13 @@ static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
/* the socket is readable if there are any messages waiting on the Rx
* queue */
if (!list_empty(&rx->recvmsg_q))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* the socket is writable if there is space to add new data to the
* socket; there is no guarantee that any particular call in progress
* on the socket may have space in the Tx ACK window */
if (rxrpc_writable(sk))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 7f74ca3059f8..064175068059 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -834,7 +834,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
* can be skipped if we find a follow-on call. The first DATA packet
* of the follow on call will implicitly ACK this call.
*/
- if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ if (call->completion == RXRPC_CALL_SUCCEEDED &&
+ test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
unsigned long final_ack_at = jiffies + 2;
WRITE_ONCE(chan->final_ack_at, final_ack_at);
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 4ca11be6be3c..b1dfae107431 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -460,6 +460,7 @@ void rxrpc_process_connection(struct work_struct *work)
case -EKEYEXPIRED:
case -EKEYREJECTED:
goto protocol_error;
+ case -ENOMEM:
case -EAGAIN:
goto requeue_and_leave;
case -ECONNABORTED:
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index c628351eb900..ccbac190add1 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -177,13 +177,21 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
* through the channel, whilst disposing of the actual call record.
*/
trace_rxrpc_disconnect_call(call);
- if (call->abort_code) {
- chan->last_abort = call->abort_code;
- chan->last_type = RXRPC_PACKET_TYPE_ABORT;
- } else {
+ switch (call->completion) {
+ case RXRPC_CALL_SUCCEEDED:
chan->last_seq = call->rx_hard_ack;
chan->last_type = RXRPC_PACKET_TYPE_ACK;
+ break;
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ chan->last_abort = call->abort_code;
+ chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+ break;
+ default:
+ chan->last_abort = RX_USER_ABORT;
+ chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+ break;
}
+
/* Sync with rxrpc_conn_retransmit(). */
smp_wmb();
chan->last_call = chan->call_id;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index c38b3a1de56c..77cb23c7bd0a 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -773,8 +773,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
{
const struct rxrpc_key_token *token;
struct rxkad_challenge challenge;
- struct rxkad_response resp
- __attribute__((aligned(8))); /* must be aligned for crypto */
+ struct rxkad_response *resp;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
const char *eproto;
u32 version, nonce, min_level, abort_code;
@@ -818,26 +817,29 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
token = conn->params.key->payload.data[0];
/* build the response packet */
- memset(&resp, 0, sizeof(resp));
-
- resp.version = htonl(RXKAD_VERSION);
- resp.encrypted.epoch = htonl(conn->proto.epoch);
- resp.encrypted.cid = htonl(conn->proto.cid);
- resp.encrypted.securityIndex = htonl(conn->security_ix);
- resp.encrypted.inc_nonce = htonl(nonce + 1);
- resp.encrypted.level = htonl(conn->params.security_level);
- resp.kvno = htonl(token->kad->kvno);
- resp.ticket_len = htonl(token->kad->ticket_len);
-
- resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
- resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
- resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
- resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
+ resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS);
+ if (!resp)
+ return -ENOMEM;
+
+ resp->version = htonl(RXKAD_VERSION);
+ resp->encrypted.epoch = htonl(conn->proto.epoch);
+ resp->encrypted.cid = htonl(conn->proto.cid);
+ resp->encrypted.securityIndex = htonl(conn->security_ix);
+ resp->encrypted.inc_nonce = htonl(nonce + 1);
+ resp->encrypted.level = htonl(conn->params.security_level);
+ resp->kvno = htonl(token->kad->kvno);
+ resp->ticket_len = htonl(token->kad->ticket_len);
+ resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
+ resp->encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
+ resp->encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
+ resp->encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
/* calculate the response checksum and then do the encryption */
- rxkad_calc_response_checksum(&resp);
- rxkad_encrypt_response(conn, &resp, token->kad);
- return rxkad_send_response(conn, &sp->hdr, &resp, token->kad);
+ rxkad_calc_response_checksum(resp);
+ rxkad_encrypt_response(conn, resp, token->kad);
+ ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
+ kfree(resp);
+ return ret;
protocol_error:
trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
@@ -1048,8 +1050,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
{
- struct rxkad_response response
- __attribute__((aligned(8))); /* must be aligned for crypto */
+ struct rxkad_response *response;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
const char *eproto;
@@ -1061,17 +1062,22 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
+ ret = -ENOMEM;
+ response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS);
+ if (!response)
+ goto temporary_error;
+
eproto = tracepoint_string("rxkad_rsp_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &response, sizeof(response)) < 0)
+ response, sizeof(*response)) < 0)
goto protocol_error;
- if (!pskb_pull(skb, sizeof(response)))
+ if (!pskb_pull(skb, sizeof(*response)))
BUG();
- version = ntohl(response.version);
- ticket_len = ntohl(response.ticket_len);
- kvno = ntohl(response.kvno);
+ version = ntohl(response->version);
+ ticket_len = ntohl(response->ticket_len);
+ kvno = ntohl(response->kvno);
_proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
sp->hdr.serial, version, kvno, ticket_len);
@@ -1105,31 +1111,31 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key,
&expiry, _abort_code);
if (ret < 0)
- goto temporary_error_free;
+ goto temporary_error_free_resp;
/* use the session key from inside the ticket to decrypt the
* response */
- rxkad_decrypt_response(conn, &response, &session_key);
+ rxkad_decrypt_response(conn, response, &session_key);
eproto = tracepoint_string("rxkad_rsp_param");
abort_code = RXKADSEALEDINCON;
- if (ntohl(response.encrypted.epoch) != conn->proto.epoch)
+ if (ntohl(response->encrypted.epoch) != conn->proto.epoch)
goto protocol_error_free;
- if (ntohl(response.encrypted.cid) != conn->proto.cid)
+ if (ntohl(response->encrypted.cid) != conn->proto.cid)
goto protocol_error_free;
- if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
+ if (ntohl(response->encrypted.securityIndex) != conn->security_ix)
goto protocol_error_free;
- csum = response.encrypted.checksum;
- response.encrypted.checksum = 0;
- rxkad_calc_response_checksum(&response);
+ csum = response->encrypted.checksum;
+ response->encrypted.checksum = 0;
+ rxkad_calc_response_checksum(response);
eproto = tracepoint_string("rxkad_rsp_csum");
- if (response.encrypted.checksum != csum)
+ if (response->encrypted.checksum != csum)
goto protocol_error_free;
spin_lock(&conn->channel_lock);
for (i = 0; i < RXRPC_MAXCALLS; i++) {
struct rxrpc_call *call;
- u32 call_id = ntohl(response.encrypted.call_id[i]);
+ u32 call_id = ntohl(response->encrypted.call_id[i]);
eproto = tracepoint_string("rxkad_rsp_callid");
if (call_id > INT_MAX)
@@ -1153,12 +1159,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
eproto = tracepoint_string("rxkad_rsp_seq");
abort_code = RXKADOUTOFSEQUENCE;
- if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
+ if (ntohl(response->encrypted.inc_nonce) != conn->security_nonce + 1)
goto protocol_error_free;
eproto = tracepoint_string("rxkad_rsp_level");
abort_code = RXKADLEVELFAIL;
- level = ntohl(response.encrypted.level);
+ level = ntohl(response->encrypted.level);
if (level > RXRPC_SECURITY_ENCRYPT)
goto protocol_error_free;
conn->params.security_level = level;
@@ -1168,9 +1174,10 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
* as for a client connection */
ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
if (ret < 0)
- goto temporary_error_free;
+ goto temporary_error_free_ticket;
kfree(ticket);
+ kfree(response);
_leave(" = 0");
return 0;
@@ -1179,12 +1186,15 @@ protocol_error_unlock:
protocol_error_free:
kfree(ticket);
protocol_error:
+ kfree(response);
trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
*_abort_code = abort_code;
return -EPROTO;
-temporary_error_free:
+temporary_error_free_ticket:
kfree(ticket);
+temporary_error_free_resp:
+ kfree(response);
temporary_error:
/* Ignore the response packet if we got a temporary error such as
* ENOMEM. We just want to send the challenge again. Note that we
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c07e8abc3d52..6c7601a530e3 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -947,7 +947,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- if (n->flags != flags) {
+ if ((n->flags ^ flags) &
+ ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
return -EINVAL;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7bbc13b8ca47..7c179addebcd 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -327,7 +327,7 @@ static s64 tabledist(s64 mu, s32 sigma,
/* default uniform distribution */
if (dist == NULL)
- return (rnd % (2 * sigma)) - sigma + mu;
+ return ((rnd % (2 * sigma)) + mu) - sigma;
t = dist->table[rnd % dist->size];
x = (sigma % NETEM_DIST_SCALE) * t;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 793b05ec692b..d01475f5f710 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1380,9 +1380,14 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
struct sctp_chunk *retval;
struct sk_buff *skb;
struct sock *sk;
+ int chunklen;
+
+ chunklen = SCTP_PAD4(sizeof(*chunk_hdr) + paylen);
+ if (chunklen > SCTP_MAX_CHUNK_LEN)
+ goto nodata;
/* No need to allocate LL here, as this is only a chunk. */
- skb = alloc_skb(SCTP_PAD4(sizeof(*chunk_hdr) + paylen), gfp);
+ skb = alloc_skb(chunklen, gfp);
if (!skb)
goto nodata;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ebb8cb9eb0bd..bf271f8c2dc9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7602,22 +7602,22 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
*/
if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
return (!list_empty(&sp->ep->asocs)) ?
- (POLLIN | POLLRDNORM) : 0;
+ (EPOLLIN | EPOLLRDNORM) : 0;
mask = 0;
/* Is there any exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* Is it readable? Reconsider this code with TCP-style support. */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* The association is either gone or not ready. */
if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED))
@@ -7625,7 +7625,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
/* Is it writable? */
if (sctp_writeable(sk)) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
} else {
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
/*
@@ -7637,7 +7637,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
* in the following code to cover it as well.
*/
if (sctp_writeable(sk))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
return mask;
}
@@ -8161,8 +8161,8 @@ void sctp_data_ready(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
- POLLRDNORM | POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
+ EPOLLRDNORM | EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 3583c8ab1bae..da1a5cdefd13 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1141,11 +1141,11 @@ out:
static __poll_t smc_accept_poll(struct sock *parent)
{
struct smc_sock *isk = smc_sk(parent);
- int mask = 0;
+ __poll_t mask = 0;
spin_lock(&isk->accept_q_lock);
if (!list_empty(&isk->accept_q))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
spin_unlock(&isk->accept_q_lock);
return mask;
@@ -1160,7 +1160,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
int rc;
if (!sk)
- return POLLNVAL;
+ return EPOLLNVAL;
smc = smc_sk(sock->sk);
sock_hold(sk);
@@ -1171,16 +1171,16 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
/* if non-blocking connect finished ... */
lock_sock(sk);
- if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) {
+ if ((sk->sk_state == SMC_INIT) && (mask & EPOLLOUT)) {
sk->sk_err = smc->clcsock->sk->sk_err;
if (sk->sk_err) {
- mask |= POLLERR;
+ mask |= EPOLLERR;
} else {
rc = smc_connect_rdma(smc);
if (rc < 0)
- mask |= POLLERR;
+ mask |= EPOLLERR;
/* success cases including fallback */
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
}
} else {
@@ -1190,27 +1190,27 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
lock_sock(sk);
}
if (sk->sk_err)
- mask |= POLLERR;
+ mask |= EPOLLERR;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_state == SMC_LISTEN) {
/* woken up by sk_data_ready in smc_listen_work() */
mask = smc_accept_poll(sk);
} else {
if (atomic_read(&smc->conn.sndbuf_space) ||
sk->sk_shutdown & SEND_SHUTDOWN) {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
} else {
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
}
if (atomic_read(&smc->conn.bytes_to_rcv))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
if (sk->sk_state == SMC_APPCLOSEWAIT1)
- mask |= POLLIN;
+ mask |= EPOLLIN;
}
}
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 9dc392ca06bf..eff4e0d0bb31 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -35,8 +35,8 @@ static void smc_rx_data_ready(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
- POLLRDNORM | POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
+ EPOLLRDNORM | EPOLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 838bce20c361..72f004c9c9b1 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -46,8 +46,8 @@ static void smc_tx_write_space(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_poll(&wq->wait,
- POLLOUT | POLLWRNORM |
- POLLWRBAND);
+ EPOLLOUT | EPOLLWRNORM |
+ EPOLLWRBAND);
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index aa36dad32db1..8a7e1c774f9c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -940,7 +940,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
poll_wait(filp, &queue_wait, wait);
/* alway allow write */
- mask = POLLOUT | POLLWRNORM;
+ mask = EPOLLOUT | EPOLLWRNORM;
if (!rp)
return mask;
@@ -950,7 +950,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
for (cq= &rp->q; &cq->list != &cd->queue;
cq = list_entry(cq->list.next, struct cache_queue, list))
if (!cq->reader) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
break;
}
spin_unlock(&queue_lock);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 5c4330325787..fc97fc3ed637 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -345,15 +345,15 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
{
struct inode *inode = file_inode(filp);
struct rpc_inode *rpci = RPC_I(inode);
- __poll_t mask = POLLOUT | POLLWRNORM;
+ __poll_t mask = EPOLLOUT | EPOLLWRNORM;
poll_wait(filp, &rpci->waitq, wait);
inode_lock(inode);
if (rpci->pipe == NULL)
- mask |= POLLERR | POLLHUP;
+ mask |= EPOLLERR | EPOLLHUP;
else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
inode_unlock(inode);
return mask;
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 896691afbb1a..d9db2eab3a8d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -461,6 +461,18 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r
/*
* Wake up a task on a specific queue
*/
+void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
+ struct rpc_wait_queue *queue,
+ struct rpc_task *task)
+{
+ spin_lock_bh(&queue->lock);
+ rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
+ spin_unlock_bh(&queue->lock);
+}
+
+/*
+ * Wake up a task on a specific queue
+ */
void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
{
spin_lock_bh(&queue->lock);
@@ -1092,12 +1104,12 @@ static int rpciod_start(void)
* Create the rpciod thread and wait for it to start.
*/
dprintk("RPC: creating workqueue rpciod\n");
- wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
+ wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!wq)
goto out_failed;
rpciod_workqueue = wq;
/* Note: highpri because network receive is latency sensitive */
- wq = alloc_workqueue("xprtiod", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+ wq = alloc_workqueue("xprtiod", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_HIGHPRI, 0);
if (!wq)
goto free_rpciod;
xprtiod_workqueue = wq;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2436fd1125fc..8f0ad4f268da 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -517,7 +517,8 @@ void xprt_write_space(struct rpc_xprt *xprt)
if (xprt->snd_task) {
dprintk("RPC: write space: waking waiting task on "
"xprt %p\n", xprt);
- rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task);
+ rpc_wake_up_queued_task_on_wq(xprtiod_workqueue,
+ &xprt->pending, xprt->snd_task);
}
spin_unlock_bh(&xprt->transport_lock);
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 162e5dd82466..f0855a959a27 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -143,7 +143,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
if (xdr->page_len) {
remaining = xdr->page_len;
offset = offset_in_page(xdr->page_base);
- count = 0;
+ count = RPCRDMA_MIN_SEND_SGES;
while (remaining) {
remaining -= min_t(unsigned int,
PAGE_SIZE - offset, remaining);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index f4eb63e8e689..e6f84a6434a0 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -505,7 +505,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
return -ENOMEM;
}
- ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
+ ia->ri_max_send_sges = max_sge;
if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
dprintk("RPC: %s: insufficient wqe's available\n",
@@ -1502,6 +1502,9 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
static void
rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
{
+ if (!rb)
+ return;
+
if (!rpcrdma_regbuf_is_mapped(rb))
return;
@@ -1517,9 +1520,6 @@ rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
void
rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
{
- if (!rb)
- return;
-
rpcrdma_dma_unmap_regbuf(rb);
kfree(rb);
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 18803021f242..a6b8c1f8f92a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -807,13 +807,6 @@ static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
smp_mb__after_atomic();
}
-static void xs_sock_mark_closed(struct rpc_xprt *xprt)
-{
- xs_sock_reset_connection_flags(xprt);
- /* Mark transport as closed and wake up all pending tasks */
- xprt_disconnect_done(xprt);
-}
-
/**
* xs_error_report - callback to handle TCP socket state errors
* @sk: socket
@@ -833,9 +826,6 @@ static void xs_error_report(struct sock *sk)
err = -sk->sk_err;
if (err == 0)
goto out;
- /* Is this a reset event? */
- if (sk->sk_state == TCP_CLOSE)
- xs_sock_mark_closed(xprt);
dprintk("RPC: xs_error_report client %p, error=%d...\n",
xprt, -err);
trace_rpc_socket_error(xprt, sk->sk_socket, err);
@@ -1078,18 +1068,18 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
/* Suck it into the iovec, verify checksum if not done by hw. */
if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
- __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
spin_lock(&xprt->recv_lock);
+ __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
goto out_unpin;
}
- __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
spin_lock_bh(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, copied);
spin_unlock_bh(&xprt->transport_lock);
spin_lock(&xprt->recv_lock);
xprt_complete_rqst(task, copied);
+ __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
out_unpin:
xprt_unpin_rqst(rovr);
out_unlock:
@@ -1655,9 +1645,11 @@ static void xs_tcp_state_change(struct sock *sk)
if (test_and_clear_bit(XPRT_SOCK_CONNECTING,
&transport->sock_state))
xprt_clear_connecting(xprt);
+ clear_bit(XPRT_CLOSING, &xprt->state);
if (sk->sk_err)
xprt_wake_pending_tasks(xprt, -sk->sk_err);
- xs_sock_mark_closed(xprt);
+ /* Trigger the socket release */
+ xs_tcp_force_close(xprt);
}
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -2265,14 +2257,19 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
+ int skst = transport->inet ? transport->inet->sk_state : TCP_CLOSE;
if (sock == NULL)
return;
- if (xprt_connected(xprt)) {
+ switch (skst) {
+ default:
kernel_sock_shutdown(sock, SHUT_RDWR);
trace_rpc_socket_shutdown(xprt, sock);
- } else
+ break;
+ case TCP_CLOSE:
+ case TCP_TIME_WAIT:
xs_reset_transport(transport);
+ }
}
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 55d8ba92291d..4e1c6f6450bb 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -208,8 +208,8 @@ bool tipc_msg_validate(struct sk_buff **_skb)
int msz, hsz;
/* Ensure that flow control ratio condition is satisfied */
- if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) {
- skb = skb_copy(skb, GFP_ATOMIC);
+ if (unlikely(skb->truesize / buf_roundup_len(skb) >= 4)) {
+ skb = skb_copy_expand(skb, BUF_HEADROOM, 0, GFP_ATOMIC);
if (!skb)
return false;
kfree_skb(*_skb);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 163f3a547501..b0323ec7971e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -721,31 +721,31 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- revents |= POLLRDHUP | POLLIN | POLLRDNORM;
+ revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- revents |= POLLHUP;
+ revents |= EPOLLHUP;
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
case TIPC_CONNECTING:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
- revents |= POLLOUT;
+ revents |= EPOLLOUT;
/* fall thru' */
case TIPC_LISTEN:
if (!skb_queue_empty(&sk->sk_receive_queue))
- revents |= POLLIN | POLLRDNORM;
+ revents |= EPOLLIN | EPOLLRDNORM;
break;
case TIPC_OPEN:
if (tsk->group_is_open && !tsk->cong_link_cnt)
- revents |= POLLOUT;
+ revents |= EPOLLOUT;
if (!tipc_sk_type_connectionless(sk))
break;
if (skb_queue_empty(&sk->sk_receive_queue))
break;
- revents |= POLLIN | POLLRDNORM;
+ revents |= EPOLLIN | EPOLLRDNORM;
break;
case TIPC_DISCONNECTING:
- revents = POLLIN | POLLRDNORM | POLLHUP;
+ revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP;
break;
}
return revents;
@@ -1897,8 +1897,8 @@ static void tipc_write_space(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
- POLLWRNORM | POLLWRBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
rcu_read_unlock();
}
@@ -1914,8 +1914,8 @@ static void tipc_data_ready(struct sock *sk)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
- POLLRDNORM | POLLRDBAND);
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
+ EPOLLRDNORM | EPOLLRDBAND);
rcu_read_unlock();
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 736719c8314e..b0d5fcea47e7 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -484,6 +484,8 @@ out:
static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
.name = "tls",
+ .uid = TCP_ULP_TLS,
+ .user_visible = true,
.owner = THIS_MODULE,
.init = tls_init,
};
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0214acbd6bff..d545e1d0dea2 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -415,9 +415,9 @@ static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
{
unix_dgram_peer_wake_disconnect(sk, other);
wake_up_interruptible_poll(sk_sleep(sk),
- POLLOUT |
- POLLWRNORM |
- POLLWRBAND);
+ EPOLLOUT |
+ EPOLLWRNORM |
+ EPOLLWRBAND);
}
/* preconditions:
@@ -454,7 +454,7 @@ static void unix_write_space(struct sock *sk)
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait,
- POLLOUT | POLLWRNORM | POLLWRBAND);
+ EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
@@ -2129,8 +2129,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
if (wq_has_sleeper(&u->peer_wait))
wake_up_interruptible_sync_poll(&u->peer_wait,
- POLLOUT | POLLWRNORM |
- POLLWRBAND);
+ EPOLLOUT | EPOLLWRNORM |
+ EPOLLWRBAND);
if (msg->msg_name)
unix_copy_addr(msg, skb->sk);
@@ -2650,27 +2650,27 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
/* exceptional events? */
if (sk->sk_err)
- mask |= POLLERR;
+ mask |= EPOLLERR;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
sk->sk_state == TCP_CLOSE)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/*
* we set writable also when the other side has shut down the
* connection. This prevents stuck sockets.
*/
if (unix_writable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
@@ -2687,29 +2687,29 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR |
- (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+ mask |= EPOLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if (sk->sk_type == SOCK_SEQPACKET) {
if (sk->sk_state == TCP_CLOSE)
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
/* connection hasn't started yet? */
if (sk->sk_state == TCP_SYN_SENT)
return mask;
}
/* No write status requested, avoid expensive OUT tests. */
- if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
+ if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
return mask;
writable = unix_writable(sk);
@@ -2726,7 +2726,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
}
if (writable)
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9d95e773f4c8..e0fc84daed94 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -865,20 +865,20 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
if (sk->sk_err)
/* Signify that there has been an error on this socket. */
- mask |= POLLERR;
+ mask |= EPOLLERR;
/* INET sockets treat local write shutdown and peer write shutdown as a
- * case of POLLHUP set.
+ * case of EPOLLHUP set.
*/
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
((sk->sk_shutdown & SEND_SHUTDOWN) &&
(vsk->peer_shutdown & SEND_SHUTDOWN))) {
- mask |= POLLHUP;
+ mask |= EPOLLHUP;
}
if (sk->sk_shutdown & RCV_SHUTDOWN ||
vsk->peer_shutdown & SEND_SHUTDOWN) {
- mask |= POLLRDHUP;
+ mask |= EPOLLRDHUP;
}
if (sock->type == SOCK_DGRAM) {
@@ -888,11 +888,11 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
*/
if (!skb_queue_empty(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN)) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
} else if (sock->type == SOCK_STREAM) {
lock_sock(sk);
@@ -902,7 +902,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
*/
if (sk->sk_state == TCP_LISTEN
&& !vsock_is_accept_queue_empty(sk))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* If there is something in the queue then we can read. */
if (transport->stream_is_active(vsk) &&
@@ -911,10 +911,10 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
int ret = transport->notify_poll_in(
vsk, 1, &data_ready_now);
if (ret < 0) {
- mask |= POLLERR;
+ mask |= EPOLLERR;
} else {
if (data_ready_now)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
}
@@ -925,7 +925,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
*/
if (sk->sk_shutdown & RCV_SHUTDOWN ||
vsk->peer_shutdown & SEND_SHUTDOWN) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
}
/* Connected sockets that can produce data can be written. */
@@ -935,25 +935,25 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
int ret = transport->notify_poll_out(
vsk, 1, &space_avail_now);
if (ret < 0) {
- mask |= POLLERR;
+ mask |= EPOLLERR;
} else {
if (space_avail_now)
- /* Remove POLLWRBAND since INET
+ /* Remove EPOLLWRBAND since INET
* sockets are not setting it.
*/
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
}
}
/* Simulate INET socket poll behaviors, which sets
- * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
+ * EPOLLOUT|EPOLLWRNORM when peer is closed and nothing to read,
* but local send is not shutdown.
*/
if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}