summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig1
-rw-r--r--net/bridge/br_netfilter_hooks.c2
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c74
-rw-r--r--net/caif/cfpkt_skbuff.c1
-rw-r--r--net/caif/chnl_net.c1
-rw-r--r--net/core/dev.c14
-rw-r--r--net/core/ethtool.c28
-rw-r--r--net/core/rtnetlink.c20
-rw-r--r--net/ipv4/netfilter.c62
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/arp_tables.c26
-rw-r--r--net/ipv4/netfilter/ip_tables.c26
-rw-r--r--net/ipv4/netfilter/iptable_filter.c6
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c5
-rw-r--r--net/ipv4/netfilter/iptable_nat.c4
-rw-r--r--net/ipv4/netfilter/iptable_raw.c6
-rw-r--r--net/ipv4/netfilter/iptable_security.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c4
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c284
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c13
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c36
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c8
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_output.c3
-rw-r--r--net/ipv6/datagram.c3
-rw-r--r--net/ipv6/ip6_fib.c43
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/ip6_tunnel.c15
-rw-r--r--net/ipv6/netfilter.c44
-rw-r--r--net/ipv6/netfilter/Kconfig8
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c26
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c8
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c4
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c278
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c35
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c12
-rw-r--r--net/ipv6/route.c141
-rw-r--r--net/ipv6/seg6_local.c2
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/l2tp/l2tp_core.c7
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/netfilter/Kconfig31
-rw-r--r--net/netfilter/Makefile9
-rw-r--r--net/netfilter/core.c263
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h10
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c8
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c8
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c8
-rw-r--r--net/netfilter/ipset/ip_set_core.c33
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h38
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c21
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c1
-rw-r--r--net/netfilter/nf_conncount.c373
-rw-r--r--net/netfilter/nf_conntrack_core.c20
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c40
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c77
-rw-r--r--net/netfilter/nf_conntrack_netlink.c17
-rw-r--r--net/netfilter/nf_conntrack_proto.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c25
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c10
-rw-r--r--net/netfilter/nf_conntrack_standalone.c12
-rw-r--r--net/netfilter/nf_flow_table.c429
-rw-r--r--net/netfilter/nf_flow_table_inet.c48
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_queue.c98
-rw-r--r--net/netfilter/nf_tables_api.c876
-rw-r--r--net/netfilter/nf_tables_inet.c39
-rw-r--r--net/netfilter/nf_tables_netdev.c17
-rw-r--r--net/netfilter/nfnetlink_queue.c9
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/netfilter/nft_compat.c8
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_flow_offload.c264
-rw-r--r--net/netfilter/nft_meta.c43
-rw-r--r--net/netfilter/nft_rt.c15
-rw-r--r--net/netfilter/utils.c90
-rw-r--r--net/netfilter/x_tables.c38
-rw-r--r--net/netfilter/xt_TCPMSS.c5
-rw-r--r--net/netfilter/xt_addrtype.c15
-rw-r--r--net/netfilter/xt_bpf.c14
-rw-r--r--net/netfilter/xt_connlimit.c369
-rw-r--r--net/netfilter/xt_set.c119
-rw-r--r--net/openvswitch/vport-internal_dev.c1
-rw-r--r--net/rds/rdma.c4
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/sch_red.c24
-rw-r--r--net/sctp/input.c28
-rw-r--r--net/sctp/stream.c22
-rw-r--r--net/sctp/transport.c29
-rw-r--r--net/tipc/core.h1
-rw-r--r--net/tipc/group.c345
-rw-r--r--net/tipc/group.h8
-rw-r--r--net/tipc/name_table.c55
-rw-r--r--net/tipc/name_table.h6
-rw-r--r--net/tipc/server.c6
-rw-r--r--net/tipc/server.h7
-rw-r--r--net/tipc/socket.c103
-rw-r--r--net/tipc/subscr.c20
-rw-r--r--net/tipc/subscr.h2
-rw-r--r--net/wireless/nl80211.c3
119 files changed, 3947 insertions, 1641 deletions
diff --git a/net/Kconfig b/net/Kconfig
index efe930db3c08..37ec8e67af57 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -182,6 +182,7 @@ config BRIDGE_NETFILTER
depends on BRIDGE
depends on NETFILTER && INET
depends on NETFILTER_ADVANCED
+ select NETFILTER_FAMILY_BRIDGE
default m
---help---
Enabling this option will let arptables resp. iptables see bridged
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c2eea1b8737a..27f1d4f2114a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -991,7 +991,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
unsigned int i;
int ret;
- e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+ e = rcu_dereference(net->nf.hooks_bridge[hook]);
if (!e)
return okfn(net, sk, skb);
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index e7ef1a1ef3a6..225d1668dfdd 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -4,6 +4,7 @@
#
menuconfig NF_TABLES_BRIDGE
depends on BRIDGE && NETFILTER && NF_TABLES
+ select NETFILTER_FAMILY_BRIDGE
tristate "Ethernet Bridge nf_tables support"
if NF_TABLES_BRIDGE
@@ -29,6 +30,7 @@ endif # NF_TABLES_BRIDGE
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
+ select NETFILTER_FAMILY_BRIDGE
help
ebtables is a general, extensible frame/packet identification
framework. Say 'Y' or 'M' here if you want to do Ethernet
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 97afdc0744e6..86774b5c3b73 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -25,15 +25,17 @@ nft_do_chain_bridge(void *priv,
{
struct nft_pktinfo pkt;
+ nft_set_pktinfo(&pkt, skb, state);
+
switch (eth_hdr(skb)->h_proto) {
case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
break;
case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
break;
default:
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
break;
}
@@ -44,14 +46,6 @@ static struct nft_af_info nft_af_bridge __read_mostly = {
.family = NFPROTO_BRIDGE,
.nhooks = NF_BR_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
- [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
- [NF_BR_FORWARD] = nft_do_chain_bridge,
- [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
- [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
- },
};
static int nf_tables_bridge_init_net(struct net *net)
@@ -92,67 +86,32 @@ static const struct nf_chain_type filter_bridge = {
(1 << NF_BR_FORWARD) |
(1 << NF_BR_LOCAL_OUT) |
(1 << NF_BR_POST_ROUTING),
-};
-
-static void nf_br_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
-}
-
-static int nf_br_reroute(struct net *net, struct sk_buff *skb,
- const struct nf_queue_entry *entry)
-{
- return 0;
-}
-
-static __sum16 nf_br_checksum(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, u_int8_t protocol)
-{
- return 0;
-}
-
-static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, unsigned int len,
- u_int8_t protocol)
-{
- return 0;
-}
-
-static int nf_br_route(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict __always_unused)
-{
- return 0;
-}
-
-static const struct nf_afinfo nf_br_afinfo = {
- .family = AF_BRIDGE,
- .checksum = nf_br_checksum,
- .checksum_partial = nf_br_checksum_partial,
- .route = nf_br_route,
- .saveroute = nf_br_saveroute,
- .reroute = nf_br_reroute,
- .route_key_size = 0,
+ .hooks = {
+ [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
+ [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
+ [NF_BR_FORWARD] = nft_do_chain_bridge,
+ [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
+ [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
+ },
};
static int __init nf_tables_bridge_init(void)
{
int ret;
- nf_register_afinfo(&nf_br_afinfo);
ret = nft_register_chain_type(&filter_bridge);
if (ret < 0)
- goto err1;
+ return ret;
ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
if (ret < 0)
- goto err2;
+ goto err_register_subsys;
return ret;
-err2:
+err_register_subsys:
nft_unregister_chain_type(&filter_bridge);
-err1:
- nf_unregister_afinfo(&nf_br_afinfo);
+
return ret;
}
@@ -160,7 +119,6 @@ static void __exit nf_tables_bridge_exit(void)
{
unregister_pernet_subsys(&nf_tables_bridge_net_ops);
nft_unregister_chain_type(&filter_bridge);
- nf_unregister_afinfo(&nf_br_afinfo);
}
module_init(nf_tables_bridge_init);
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 71b6ab240dea..38c2b7a890dd 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -8,7 +8,6 @@
#include <linux/string.h>
#include <linux/skbuff.h>
-#include <linux/hardirq.h>
#include <linux/export.h>
#include <net/caif/cfpkt.h>
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 922ac1d605b3..53ecda10b790 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -8,7 +8,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
#include <linux/fs.h>
-#include <linux/hardirq.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netdevice.h>
diff --git a/net/core/dev.c b/net/core/dev.c
index 74e1e5d31337..3d24d9a59086 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name);
int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- return dev_alloc_name_ns(net, dev, name);
+ BUG_ON(!net);
+
+ if (!dev_valid_name(name))
+ return -EINVAL;
+
+ if (strchr(name, '%'))
+ return dev_alloc_name_ns(net, dev, name);
+ else if (__dev_get_by_name(net, name))
+ return -EEXIST;
+ else if (dev->name != name)
+ strlcpy(dev->name, name, IFNAMSIZ);
+
+ return 0;
}
EXPORT_SYMBOL(dev_get_valid_name);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 50a79203043b..107b122c8969 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -771,15 +771,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
}
-static void
-warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
-{
- char name[sizeof(current->comm)];
-
- pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
- get_task_comm(name, current), details);
-}
-
/* Query device for its ethtool_cmd settings.
*
* Backward compatibility note: for compatibility with legacy ethtool,
@@ -806,10 +797,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
&link_ksettings);
if (err < 0)
return err;
- if (!convert_link_ksettings_to_legacy_settings(&cmd,
- &link_ksettings))
- warn_incomplete_ethtool_legacy_settings_conversion(
- "link modes are only partially reported");
+ convert_link_ksettings_to_legacy_settings(&cmd,
+ &link_ksettings);
/* send a sensible cmd tag back to user */
cmd.cmd = ETHTOOL_GSET;
@@ -1704,14 +1693,23 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
{
- struct ethtool_ringparam ringparam;
+ struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM };
- if (!dev->ethtool_ops->set_ringparam)
+ if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam)
return -EOPNOTSUPP;
if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
return -EFAULT;
+ dev->ethtool_ops->get_ringparam(dev, &max);
+
+ /* ensure new ring parameters are within the maximums */
+ if (ringparam.rx_pending > max.rx_max_pending ||
+ ringparam.rx_mini_pending > max.rx_mini_max_pending ||
+ ringparam.rx_jumbo_pending > max.rx_jumbo_max_pending ||
+ ringparam.tx_pending > max.tx_max_pending)
+ return -EINVAL;
+
return dev->ethtool_ops->set_ringparam(dev, &ringparam);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c688dc564b11..16d644a4f974 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -904,6 +904,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size_64bit(sizeof(__u64)) +
/* IFLA_VF_STATS_MULTICAST */
nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_RX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_TX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)) +
nla_total_size(sizeof(struct ifla_vf_trust)));
return size;
} else
@@ -1258,7 +1262,11 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast, IFLA_VF_STATS_PAD)) {
+ vf_stats.multicast, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
+ vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
+ vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
nla_nest_cancel(skb, vfstats);
goto nla_put_vf_failure;
}
@@ -1751,18 +1759,18 @@ static bool link_dump_filtered(struct net_device *dev,
return false;
}
-static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+static struct net *get_target_net(struct sock *sk, int netnsid)
{
struct net *net;
- net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+ net = get_net_ns_by_id(sock_net(sk), netnsid);
if (!net)
return ERR_PTR(-EINVAL);
/* For now, the caller is required to have CAP_NET_ADMIN in
* the user namespace owning the target net ns.
*/
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
put_net(net);
return ERR_PTR(-EACCES);
}
@@ -1803,7 +1811,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
ifla_policy, NULL) >= 0) {
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(skb->sk, netnsid);
if (IS_ERR(tgt_net)) {
tgt_net = net;
netnsid = -1;
@@ -2985,7 +2993,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFLA_IF_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb, netnsid);
+ tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
if (IS_ERR(tgt_net))
return PTR_ERR(tgt_net);
}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c0cc6aa8cfaa..e6774ccb7731 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -80,35 +80,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
}
EXPORT_SYMBOL(ip_route_me_harder);
-/*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
-
-struct ip_rt_info {
- __be32 daddr;
- __be32 saddr;
- u_int8_t tos;
- u_int32_t mark;
-};
-
-static void nf_ip_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
- struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct iphdr *iph = ip_hdr(skb);
-
- rt_info->tos = iph->tos;
- rt_info->daddr = iph->daddr;
- rt_info->saddr = iph->saddr;
- rt_info->mark = skb->mark;
- }
-}
-
-static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
- const struct nf_queue_entry *entry)
+int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
{
const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -119,10 +91,12 @@ static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
skb->mark == rt_info->mark &&
iph->daddr == rt_info->daddr &&
iph->saddr == rt_info->saddr))
- return ip_route_me_harder(net, skb, RTN_UNSPEC);
+ return ip_route_me_harder(entry->state.net, skb,
+ RTN_UNSPEC);
}
return 0;
}
+EXPORT_SYMBOL_GPL(nf_ip_reroute);
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
@@ -155,9 +129,9 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
}
EXPORT_SYMBOL(nf_ip_checksum);
-static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, unsigned int len,
- u_int8_t protocol)
+__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
@@ -175,9 +149,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
}
return csum;
}
+EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
-static int nf_ip_route(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict __always_unused)
+int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict __always_unused)
{
struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
if (IS_ERR(rt))
@@ -185,19 +160,4 @@ static int nf_ip_route(struct net *net, struct dst_entry **dst,
*dst = &rt->dst;
return 0;
}
-
-static const struct nf_afinfo nf_ip_afinfo = {
- .family = AF_INET,
- .checksum = nf_ip_checksum,
- .checksum_partial = nf_ip_checksum_partial,
- .route = nf_ip_route,
- .saveroute = nf_ip_saveroute,
- .reroute = nf_ip_reroute,
- .route_key_size = sizeof(struct ip_rt_info),
-};
-
-static int __init ipv4_netfilter_init(void)
-{
- return nf_register_afinfo(&nf_ip_afinfo);
-}
-subsys_initcall(ipv4_netfilter_init);
+EXPORT_SYMBOL_GPL(nf_ip_route);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index c11eb1744ab1..7d5d444964aa 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -72,11 +72,20 @@ endif # NF_TABLES_IPV4
config NF_TABLES_ARP
tristate "ARP nf_tables support"
+ select NETFILTER_FAMILY_ARP
help
This option enables the ARP support for nf_tables.
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV4
+ select NF_FLOW_TABLE
+ tristate "Netfilter flow table IPv4 module"
+ help
+ This option adds the flow table IPv4 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
@@ -392,6 +401,7 @@ endif # IP_NF_IPTABLES
config IP_NF_ARPTABLES
tristate "ARP tables support"
select NETFILTER_XTABLES
+ select NETFILTER_FAMILY_ARP
depends on NETFILTER_ADVANCED
help
arptables is a general, extensible packet identification framework.
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index adcdae358365..8bb1f0c7a375 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -43,6 +43,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
+
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 0c3c944a7b72..bf8a5340f15e 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -810,9 +810,8 @@ static int get_info(struct net *net, void __user *user,
if (compat)
xt_compat_lock(NFPROTO_ARP);
#endif
- t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
- "arptable_%s", name);
- if (t) {
+ t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
+ if (!IS_ERR(t)) {
struct arpt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -841,7 +840,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(NFPROTO_ARP);
@@ -866,7 +865,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
if (get.size == private->size)
@@ -878,7 +877,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
return ret;
}
@@ -903,10 +902,9 @@ static int __do_replace(struct net *net, const char *name,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
- "arptable_%s", name);
- if (!t) {
- ret = -ENOENT;
+ t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free_newinfo_counters_untrans;
}
@@ -1020,8 +1018,8 @@ static int do_add_counters(struct net *net, const void __user *user,
return PTR_ERR(paddc);
t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
- if (!t) {
- ret = -ENOENT;
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free;
}
@@ -1408,7 +1406,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NFPROTO_ARP);
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
@@ -1423,7 +1421,7 @@ static int compat_get_entries(struct net *net,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
xt_compat_unlock(NFPROTO_ARP);
return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2e0d339028bb..0b975aa2d363 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -973,9 +973,8 @@ static int get_info(struct net *net, void __user *user,
if (compat)
xt_compat_lock(AF_INET);
#endif
- t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
- "iptable_%s", name);
- if (t) {
+ t = xt_request_find_table_lock(net, AF_INET, name);
+ if (!IS_ERR(t)) {
struct ipt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1005,7 +1004,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(AF_INET);
@@ -1030,7 +1029,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1041,7 +1040,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
return ret;
}
@@ -1064,10 +1063,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
- "iptable_%s", name);
- if (!t) {
- ret = -ENOENT;
+ t = xt_request_find_table_lock(net, AF_INET, name);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free_newinfo_counters_untrans;
}
@@ -1181,8 +1179,8 @@ do_add_counters(struct net *net, const void __user *user,
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET, tmp.name);
- if (!t) {
- ret = -ENOENT;
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free;
}
@@ -1625,7 +1623,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, &info);
@@ -1639,7 +1637,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
xt_compat_unlock(AF_INET);
return ret;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7667f223d7f8..9ac92ea7b93c 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -38,12 +38,6 @@ static unsigned int
iptable_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* root is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
}
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index aebdb337fd7e..dea138ca8925 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -49,11 +49,6 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
u_int32_t mark;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
/* Save things which could affect route */
mark = skb->mark;
iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a1a07b338ccf..0f7255cc65ee 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -72,6 +72,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_in,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -79,6 +80,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_out,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
},
@@ -86,6 +88,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_local_fn,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -93,6 +96,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_fn,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
},
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 2642ecd2645c..a869d1fea7d9 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -26,12 +26,6 @@ static unsigned int
iptable_raw_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* root is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
}
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index ff226596e4b5..e5379fe57b64 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -43,12 +43,6 @@ static unsigned int
iptable_security_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* Somebody is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 89af9d88ca21..de213a397ea8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -154,11 +154,6 @@ static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
return NF_ACCEPT;
@@ -368,7 +363,7 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("ip_conntrack");
MODULE_LICENSE("GPL");
-static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
+static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = {
&nf_conntrack_l4proto_tcp4,
&nf_conntrack_l4proto_udp4,
&nf_conntrack_l4proto_icmp,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 1849fedd9b81..5c15beafa711 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -22,7 +22,7 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_log.h>
-static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
+static const unsigned int nf_ct_icmp_timeout = 30*HZ;
static inline struct nf_icmp_net *icmp_pernet(struct net *net)
{
@@ -351,7 +351,7 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.icmp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_ICMP,
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
new file mode 100644
index 000000000000..b2d01eb25f2c
--- /dev/null
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -0,0 +1,284 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace4(&udph->check, skb, addr,
+ new_addr, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, __be32 addr,
+ __be32 new_addr)
+{
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+ csum_replace4(&iph->check, addr, new_addr);
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned int thoff = iph->ihl * 4;
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static bool ip_has_options(unsigned int thoff)
+{
+ return thoff != sizeof(struct iphdr);
+}
+
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ unsigned int thoff;
+ struct iphdr *iph;
+
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ return -1;
+
+ iph = ip_hdr(skb);
+ thoff = iph->ihl * 4;
+
+ if (ip_is_fragment(iph) ||
+ unlikely(ip_has_options(thoff)))
+ return -1;
+
+ if (iph->protocol != IPPROTO_TCP &&
+ iph->protocol != IPPROTO_UDP)
+ return -1;
+
+ thoff = iph->ihl * 4;
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v4.s_addr = iph->saddr;
+ tuple->dst_v4.s_addr = iph->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET;
+ tuple->l4proto = iph->protocol;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
+{
+ u32 mtu;
+
+ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ const struct rtable *rt;
+ struct iphdr *iph;
+ __be32 nexthop;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*iph)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ip(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ iph = ip_hdr(skb);
+ ip_decrease_ttl(iph);
+
+ skb->dev = outdev;
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+
+ return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+
+static struct nf_flowtable_type flowtable_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ip_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv4_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv4);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv4_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv4);
+}
+
+module_init(nf_flow_ipv4_module_init);
+module_exit(nf_flow_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 0443ca4120b0..f7ff6a364d7b 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -356,11 +356,6 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
#endif
unsigned int ret;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -396,11 +391,6 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
unsigned int ret;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 4bbc273b45e8..f84c17763f6f 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -21,7 +21,8 @@ nft_do_chain_arp(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
@@ -30,12 +31,6 @@ static struct nft_af_info nft_af_arp __read_mostly = {
.family = NFPROTO_ARP,
.nhooks = NF_ARP_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_ARP_IN] = nft_do_chain_arp,
- [NF_ARP_OUT] = nft_do_chain_arp,
- [NF_ARP_FORWARD] = nft_do_chain_arp,
- },
};
static int nf_tables_arp_init_net(struct net *net)
@@ -73,6 +68,10 @@ static const struct nf_chain_type filter_arp = {
.owner = THIS_MODULE,
.hook_mask = (1 << NF_ARP_IN) |
(1 << NF_ARP_OUT),
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ },
};
static int __init nf_tables_arp_init(void)
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 2840a29b2e04..f4675253f1e6 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -24,40 +24,17 @@ static unsigned int nft_do_chain_ipv4(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv4_output(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- if (unlikely(skb->len < sizeof(struct iphdr) ||
- ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
- if (net_ratelimit())
- pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
- "packet\n");
- return NF_ACCEPT;
- }
-
- return nft_do_chain_ipv4(priv, skb, state);
-}
-
-struct nft_af_info nft_af_ipv4 __read_mostly = {
+static struct nft_af_info nft_af_ipv4 __read_mostly = {
.family = NFPROTO_IPV4,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
- [NF_INET_LOCAL_OUT] = nft_ipv4_output,
- [NF_INET_FORWARD] = nft_do_chain_ipv4,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
- },
};
-EXPORT_SYMBOL_GPL(nft_af_ipv4);
static int nf_tables_ipv4_init_net(struct net *net)
{
@@ -97,6 +74,13 @@ static const struct nf_chain_type filter_ipv4 = {
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
};
static int __init nf_tables_ipv4_init(void)
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index f5c66a7a4bf2..f2a490981594 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 30493beb611a..d965c225b9f6 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -33,12 +33,8 @@ static unsigned int nf_route_table_hook(void *priv,
const struct iphdr *iph;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
mark = skb->mark;
iph = ip_hdr(skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f68cb33d50d1..d7cf861bf699 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1733,8 +1733,8 @@ static void tcp_update_recv_tstamps(struct sk_buff *skb,
}
/* Similar to __sock_recv_timestamp, but does not require an skb */
-void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
- struct scm_timestamping *tss)
+static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+ struct scm_timestamping *tss)
{
struct timeval tv;
bool has_timestamping = false;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 04be9f833927..95461f02ac9a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1944,7 +1944,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
in_flight = tcp_packets_in_flight(tp);
- BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
+ BUG_ON(tcp_skb_pcount(skb) <= 1);
+ BUG_ON(tp->snd_cwnd <= in_flight);
send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a1f918713006..fbf08ce3f5ab 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -221,8 +221,7 @@ ipv4_connected:
if (__ipv6_addr_needs_scope_id(addr_type)) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
usin->sin6_scope_id) {
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != usin->sin6_scope_id) {
+ if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) {
err = -EINVAL;
goto out;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index edda5ad3b405..e31118f417b4 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -796,12 +796,6 @@ insert_above:
return ln;
}
-static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
-{
- return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
- RTF_GATEWAY;
-}
-
static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
{
int i;
@@ -991,6 +985,7 @@ next_iter:
rt6i_nsiblings++;
}
BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+ rt6_multipath_rebalance(temp_sibling);
}
/*
@@ -1243,23 +1238,28 @@ out:
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
- struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
- lockdep_is_held(&table->tb6_lock));
- if (pn != fn && pn_leaf == rt) {
- pn_leaf = NULL;
- RCU_INIT_POINTER(pn->leaf, NULL);
- atomic_dec(&rt->rt6i_ref);
- }
- if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
-#if RT6_DEBUG >= 2
- if (!pn_leaf) {
- WARN_ON(!pn_leaf);
- pn_leaf = info->nl_net->ipv6.ip6_null_entry;
+ if (pn != fn) {
+ struct rt6_info *pn_leaf =
+ rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (pn_leaf == rt) {
+ pn_leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
+ atomic_dec(&rt->rt6i_ref);
}
+ if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn_leaf = fib6_find_prefix(info->nl_net, table,
+ pn);
+#if RT6_DEBUG >= 2
+ if (!pn_leaf) {
+ WARN_ON(!pn_leaf);
+ pn_leaf =
+ info->nl_net->ipv6.ip6_null_entry;
+ }
#endif
- atomic_inc(&pn_leaf->rt6i_ref);
- rcu_assign_pointer(pn->leaf, pn_leaf);
+ atomic_inc(&pn_leaf->rt6i_ref);
+ rcu_assign_pointer(pn->leaf, pn_leaf);
+ }
}
#endif
goto failure;
@@ -1667,6 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
sibling->rt6i_nsiblings--;
rt->rt6i_nsiblings = 0;
list_del_init(&rt->rt6i_siblings);
+ rt6_multipath_rebalance(next_sibling);
}
/* Adjust walkers */
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bcdb615aed6e..19adad6d90bc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -378,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
unsigned int mtu;
struct inet6_dev *idev;
@@ -398,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
+EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 8a4610e84e58..8071f42cd8a0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1077,10 +1077,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
}
- } else if (!(t->parms.flags &
- (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
- /* enable the cache only only if the routing decision does
- * not depend on the current inner header value
+ } else if (t->parms.proto != 0 && !(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS |
+ IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only if neither the outer protocol nor the
+ * routing decision depends on the current inner header value
*/
use_cache = true;
}
@@ -1679,11 +1680,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip6_tnl *tnl = netdev_priv(dev);
- if (tnl->parms.proto == IPPROTO_IPIP) {
- if (new_mtu < ETH_MIN_MTU)
+ if (tnl->parms.proto == IPPROTO_IPV6) {
+ if (new_mtu < IPV6_MIN_MTU)
return -EINVAL;
} else {
- if (new_mtu < IPV6_MIN_MTU)
+ if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
}
if (new_mtu > 0xFFF8 - dev->hard_header_len)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 39970e212ad5..d95ceca7ff8f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -68,32 +68,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
}
EXPORT_SYMBOL(ip6_route_me_harder);
-/*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
-
-struct ip6_rt_info {
- struct in6_addr daddr;
- struct in6_addr saddr;
- u_int32_t mark;
-};
-
-static void nf_ip6_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
- struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct ipv6hdr *iph = ipv6_hdr(skb);
-
- rt_info->daddr = iph->daddr;
- rt_info->saddr = iph->saddr;
- rt_info->mark = skb->mark;
- }
-}
-
-static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
+static int nf_ip6_reroute(struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -103,7 +78,7 @@ static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
skb->mark != rt_info->mark)
- return ip6_route_me_harder(net, skb);
+ return ip6_route_me_harder(entry->state.net, skb);
}
return 0;
}
@@ -190,25 +165,19 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
};
static const struct nf_ipv6_ops ipv6ops = {
- .chk_addr = ipv6_chk_addr,
- .route_input = ip6_route_input,
- .fragment = ip6_fragment
-};
-
-static const struct nf_afinfo nf_ip6_afinfo = {
- .family = AF_INET6,
+ .chk_addr = ipv6_chk_addr,
+ .route_input = ip6_route_input,
+ .fragment = ip6_fragment,
.checksum = nf_ip6_checksum,
.checksum_partial = nf_ip6_checksum_partial,
.route = nf_ip6_route,
- .saveroute = nf_ip6_saveroute,
.reroute = nf_ip6_reroute,
- .route_key_size = sizeof(struct ip6_rt_info),
};
int __init ipv6_netfilter_init(void)
{
RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
- return nf_register_afinfo(&nf_ip6_afinfo);
+ return 0;
}
/* This can be called from inet6_init() on errors, so it cannot
@@ -217,5 +186,4 @@ int __init ipv6_netfilter_init(void)
void ipv6_netfilter_fini(void)
{
RCU_INIT_POINTER(nf_ipv6_ops, NULL);
- nf_unregister_afinfo(&nf_ip6_afinfo);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6acb2eecd986..806e95375ec8 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -71,6 +71,14 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV6
+ select NF_FLOW_TABLE
+ tristate "Netfilter flow table IPv6 module"
+ help
+ This option adds the flow table IPv6 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c6ee0cdd0ba9..95611c4b39b0 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1d7ae9366335..6ebbef2dfb60 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -991,9 +991,8 @@ static int get_info(struct net *net, void __user *user,
if (compat)
xt_compat_lock(AF_INET6);
#endif
- t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
- "ip6table_%s", name);
- if (t) {
+ t = xt_request_find_table_lock(net, AF_INET6, name);
+ if (!IS_ERR(t)) {
struct ip6t_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1023,7 +1022,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(AF_INET6);
@@ -1049,7 +1048,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1060,7 +1059,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
return ret;
}
@@ -1083,10 +1082,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
- "ip6table_%s", name);
- if (!t) {
- ret = -ENOENT;
+ t = xt_request_find_table_lock(net, AF_INET6, name);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free_newinfo_counters_untrans;
}
@@ -1199,8 +1197,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
if (IS_ERR(paddc))
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET6, tmp.name);
- if (!t) {
- ret = -ENOENT;
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free;
}
@@ -1636,7 +1634,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, &info);
@@ -1650,7 +1648,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
xt_compat_unlock(AF_INET6);
return ret;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 2b1a9dcdbcb3..b0524b18c4fb 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
u_int8_t hop_limit;
u_int32_t flowlabel, mark;
int err;
-#if 0
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)) {
- net_warn_ratelimited("ip6t_hook: happy cracking\n");
- return NF_ACCEPT;
- }
-#endif
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 991512576c8c..47306e45a80a 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
.hook = ip6table_nat_in,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST,
},
@@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
.hook = ip6table_nat_out,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC,
},
@@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
.hook = ip6table_nat_local_fn,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST,
},
/* After packet filtering, change source */
{
.hook = ip6table_nat_fn,
+ .nat_hook = true,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 3b80a38f62b8..11a313fd9273 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr)) {
- net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
- return NF_ACCEPT;
- }
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
@@ -368,7 +363,7 @@ static struct nf_sockopt_ops so_getorigdst6 = {
.owner = THIS_MODULE,
};
-static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
+static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = {
&nf_conntrack_l4proto_tcp6,
&nf_conntrack_l4proto_udp6,
&nf_conntrack_l4proto_icmpv6,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 3ac0d826afc4..2548e2c8aedd 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -27,7 +27,7 @@
#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
#include <net/netfilter/nf_log.h>
-static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
{
@@ -352,7 +352,7 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.icmpv6.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_ICMPV6,
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
new file mode 100644
index 000000000000..0c3b9d32f64f
--- /dev/null
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -0,0 +1,278 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff, struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ switch (ip6h->nexthdr) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ unsigned int thoff = sizeof(*ip6h);
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ return -1;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_TCP &&
+ ip6h->nexthdr != IPPROTO_UDP)
+ return -1;
+
+ thoff = sizeof(*ip6h);
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v6 = ip6h->saddr;
+ tuple->dst_v6 = ip6h->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
+{
+ u32 mtu;
+
+ mtu = ip6_dst_mtu_forward(&rt->dst);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct in6_addr *nexthop;
+ struct ipv6hdr *ip6h;
+ struct rt6_info *rt;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*ip6h)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ipv6(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ ip6h = ipv6_hdr(skb);
+ ip6h->hop_limit--;
+
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+ return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
+
+static struct nf_flowtable_type flowtable_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ipv6_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv6_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv6);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv6_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv6);
+}
+
+module_init(nf_flow_ipv6_module_init);
+module_exit(nf_flow_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 1d2fb9267d6f..bed57ee65f7b 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -369,10 +369,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
#endif
unsigned int ret;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -408,10 +404,6 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
unsigned int ret;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index d6e4ba5de916..9cd45b964123 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -22,39 +22,17 @@ static unsigned int nft_do_chain_ipv6(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv6_output(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
- if (net_ratelimit())
- pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
- "packet\n");
- return NF_ACCEPT;
- }
-
- return nft_do_chain_ipv6(priv, skb, state);
-}
-
-struct nft_af_info nft_af_ipv6 __read_mostly = {
+static struct nft_af_info nft_af_ipv6 __read_mostly = {
.family = NFPROTO_IPV6,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
- [NF_INET_LOCAL_OUT] = nft_ipv6_output,
- [NF_INET_FORWARD] = nft_do_chain_ipv6,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
- },
};
-EXPORT_SYMBOL_GPL(nft_af_ipv6);
static int nf_tables_ipv6_init_net(struct net *net)
{
@@ -94,6 +72,13 @@ static const struct nf_chain_type filter_ipv6 = {
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
};
static int __init nf_tables_ipv6_init(void)
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 443cd306c0b0..73fe2bd13fcf 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index f2727475895e..11d3c3b9aa18 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(void *priv,
u32 mark, flowlabel;
int err;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
/* save source/dest address, mark, hoplimit, flowlabel, priority */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 54b5899543ef..cc5174c7254c 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
{
const struct net_device *dev = NULL;
const struct nf_ipv6_ops *v6ops;
- const struct nf_afinfo *afinfo;
int route_err, addrtype;
struct rt6_info *rt;
struct flowi6 fl6 = {
@@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
};
u32 ret = 0;
- afinfo = nf_get_afinfo(NFPROTO_IPV6);
- if (!afinfo)
+ v6ops = nf_get_ipv6_ops();
+ if (!v6ops)
return RTN_UNREACHABLE;
if (priv->flags & NFTA_FIB_F_IIF)
@@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
- v6ops = nf_get_ipv6_ops();
- if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+ if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
ret = RTN_LOCAL;
- route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
- flowi6_to_flowi(&fl6), false);
+ route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
+ flowi6_to_flowi(&fl6), false);
if (route_err)
goto err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1054b059747f..1076ae0ea9d5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -455,7 +455,6 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
int strict)
{
struct rt6_info *sibling, *next_sibling;
- int route_choosen;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
@@ -463,28 +462,19 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
- route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
- /* Don't change the route, if route_choosen == 0
- * (siblings does not include ourself)
- */
- if (route_choosen)
- list_for_each_entry_safe(sibling, next_sibling,
- &match->rt6i_siblings, rt6i_siblings) {
- route_choosen--;
- if (route_choosen == 0) {
- struct inet6_dev *idev = sibling->rt6i_idev;
-
- if (sibling->rt6i_nh_flags & RTNH_F_DEAD)
- break;
- if (sibling->rt6i_nh_flags & RTNH_F_LINKDOWN &&
- idev->cnf.ignore_routes_with_linkdown)
- break;
- if (rt6_score_route(sibling, oif, strict) < 0)
- break;
- match = sibling;
- break;
- }
- }
+ if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
+ return match;
+
+ list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
+ rt6i_siblings) {
+ if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
+ continue;
+ if (rt6_score_route(sibling, oif, strict) < 0)
+ break;
+ match = sibling;
+ break;
+ }
+
return match;
}
@@ -1833,10 +1823,10 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
if (skb) {
ip6_multipath_l3_keys(skb, &hash_keys);
- return flow_hash_from_keys(&hash_keys);
+ return flow_hash_from_keys(&hash_keys) >> 1;
}
- return get_hash_from_flowi6(fl6);
+ return get_hash_from_flowi6(fl6) >> 1;
}
void ip6_route_input(struct sk_buff *skb)
@@ -2604,6 +2594,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
#endif
rt->rt6i_metric = cfg->fc_metric;
+ rt->rt6i_nh_weight = 1;
/* We cannot add true routes via loopback here,
they would result in kernel looping; promote them to reject routes
@@ -3481,6 +3472,99 @@ struct arg_netdev_event {
};
};
+static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+ struct fib6_node *fn;
+
+ fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ iter = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ while (iter) {
+ if (iter->rt6i_metric == rt->rt6i_metric &&
+ rt6_qualify_for_ecmp(iter))
+ return iter;
+ iter = rcu_dereference_protected(iter->rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ }
+
+ return NULL;
+}
+
+static bool rt6_is_dead(const struct rt6_info *rt)
+{
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
+ (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+ rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
+ return true;
+
+ return false;
+}
+
+static int rt6_multipath_total_weight(const struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+ int total = 0;
+
+ if (!rt6_is_dead(rt))
+ total += rt->rt6i_nh_weight;
+
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
+ if (!rt6_is_dead(iter))
+ total += iter->rt6i_nh_weight;
+ }
+
+ return total;
+}
+
+static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+{
+ int upper_bound = -1;
+
+ if (!rt6_is_dead(rt)) {
+ *weight += rt->rt6i_nh_weight;
+ upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
+ total) - 1;
+ }
+ atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
+}
+
+static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+{
+ struct rt6_info *iter;
+ int weight = 0;
+
+ rt6_upper_bound_set(rt, &weight, total);
+
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ rt6_upper_bound_set(iter, &weight, total);
+}
+
+void rt6_multipath_rebalance(struct rt6_info *rt)
+{
+ struct rt6_info *first;
+ int total;
+
+ /* In case the entire multipath route was marked for flushing,
+ * then there is no need to rebalance upon the removal of every
+ * sibling route.
+ */
+ if (!rt->rt6i_nsiblings || rt->should_flush)
+ return;
+
+ /* During lookup routes are evaluated in order, so we need to
+ * make sure upper bounds are assigned from the first sibling
+ * onwards.
+ */
+ first = rt6_multipath_first_sibling(rt);
+ if (WARN_ON_ONCE(!first))
+ return;
+
+ total = rt6_multipath_total_weight(first);
+ rt6_multipath_upper_bound_set(first, total);
+}
+
static int fib6_ifup(struct rt6_info *rt, void *p_arg)
{
const struct arg_netdev_event *arg = p_arg;
@@ -3489,6 +3573,7 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
rt->rt6i_nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
+ rt6_multipath_rebalance(rt);
}
return 0;
@@ -3588,6 +3673,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
RTNH_F_LINKDOWN);
fib6_update_sernum(rt);
+ rt6_multipath_rebalance(rt);
}
return -2;
case NETDEV_CHANGE:
@@ -3595,6 +3681,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
break;
rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt6_multipath_rebalance(rt);
break;
}
@@ -3938,6 +4025,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
goto cleanup;
}
+ rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
+
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
dst_release_immediate(&rt->dst);
@@ -4160,7 +4249,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
if (!rtnh)
goto nla_put_failure;
- rtnh->rtnh_hops = 0;
+ rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 825b8e01f947..ba3767ef5e93 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -501,7 +501,7 @@ static struct seg6_action_desc *__get_action_desc(int action)
struct seg6_action_desc *desc;
int i, count;
- count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
+ count = ARRAY_SIZE(seg6_action_table);
for (i = 0; i < count; i++) {
desc = &seg6_action_table[i];
if (desc->action == action)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index aa12a26a96c6..c0f7e69f2e6c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -176,8 +176,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
/* If interface is set while binding, indices
* must coincide.
*/
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != usin->sin6_scope_id)
+ if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
return -EINVAL;
sk->sk_bound_dev_if = usin->sin6_scope_id;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 786cd7f6a5e8..62285fc6eb59 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -662,10 +662,9 @@ discard:
* |x|S|x|x|x|x|x|x| Sequence Number |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
- * Cookie value, sublayer format and offset (pad) are negotiated with
- * the peer when the session is set up. Unlike L2TPv2, we do not need
- * to parse the packet header to determine if optional fields are
- * present.
+ * Cookie value and sublayer format are negotiated with the peer when
+ * the session is set up. Unlike L2TPv2, we do not need to parse the
+ * packet header to determine if optional fields are present.
*
* Caller must already have parsed the frame and determined that it is
* a data (not control) frame before coming here. Fields up to the
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b3cff69bfd66..fd580614085b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3625,6 +3625,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
}
return true;
case NL80211_IFTYPE_MESH_POINT:
+ if (ether_addr_equal(sdata->vif.addr, hdr->addr2))
+ return false;
if (multicast)
return true;
return ether_addr_equal(sdata->vif.addr, hdr->addr1);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e4a13cc8a2e7..0ee0fcf3abbf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -12,6 +12,12 @@ config NETFILTER_INGRESS
config NETFILTER_NETLINK
tristate
+config NETFILTER_FAMILY_BRIDGE
+ bool
+
+config NETFILTER_FAMILY_ARP
+ bool
+
config NETFILTER_NETLINK_ACCT
tristate "Netfilter NFACCT over NFNETLINK interface"
depends on NETFILTER_ADVANCED
@@ -62,6 +68,8 @@ config NF_LOG_NETDEV
select NF_LOG_COMMON
if NF_CONNTRACK
+config NETFILTER_CONNCOUNT
+ tristate
config NF_CONNTRACK_MARK
bool 'Connection mark tracking support'
@@ -497,6 +505,13 @@ config NFT_CT
This option adds the "ct" expression that you can use to match
connection tracking information such as the flow state.
+config NFT_FLOW_OFFLOAD
+ depends on NF_CONNTRACK
+ tristate "Netfilter nf_tables hardware flow offload module"
+ help
+ This option adds the "flow_offload" expression that you can use to
+ choose what flows are placed into the hardware.
+
config NFT_SET_RBTREE
tristate "Netfilter nf_tables rbtree set module"
help
@@ -649,6 +664,21 @@ endif # NF_TABLES_NETDEV
endif # NF_TABLES
+config NF_FLOW_TABLE_INET
+ select NF_FLOW_TABLE
+ tristate "Netfilter flow table mixed IPv4/IPv6 module"
+ help
+ This option adds the flow table mixed IPv4/IPv6 support.
+
+ To compile it as a module, choose M here.
+
+config NF_FLOW_TABLE
+ tristate "Netfilter flow table module"
+ help
+ This option adds the flow table core infrastructure.
+
+ To compile it as a module, choose M here.
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
@@ -1120,6 +1150,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT
tristate '"connlimit" match support'
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
+ select NETFILTER_CONNCOUNT
---help---
This match allows you to match against the number of parallel
connections to a server per client IP address (or address block).
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f78ed2470831..5d9b8b959e58 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
+netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
@@ -67,6 +67,8 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# SYNPROXY
obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
+obj-$(CONFIG_NETFILTER_CONNCOUNT) += nf_conncount.o
+
# generic packet duplication from netdev family
obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
@@ -84,6 +86,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o
obj-$(CONFIG_NFT_RT) += nft_rt.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
+obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_OBJREF) += nft_objref.o
@@ -107,6 +110,10 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
+# flow table infrastructure
+obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
+obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
+
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 52cd2901a097..997dd387d259 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -4,8 +4,7 @@
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
* way.
*
- * Rusty Russell (C)2000 -- This code is GPL.
- * Patrick McHardy (c) 2006-2012
+ * This code is GPL.
*/
#include <linux/kernel.h>
#include <linux/netfilter.h>
@@ -28,34 +27,12 @@
#include "nf_internals.h"
-static DEFINE_MUTEX(afinfo_mutex);
-
-const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
-EXPORT_SYMBOL(nf_afinfo);
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
DEFINE_PER_CPU(bool, nf_skb_duplicated);
EXPORT_SYMBOL_GPL(nf_skb_duplicated);
-int nf_register_afinfo(const struct nf_afinfo *afinfo)
-{
- mutex_lock(&afinfo_mutex);
- RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
- mutex_unlock(&afinfo_mutex);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nf_register_afinfo);
-
-void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
-{
- mutex_lock(&afinfo_mutex);
- RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
- mutex_unlock(&afinfo_mutex);
- synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
-
#ifdef HAVE_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);
@@ -74,7 +51,8 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
struct nf_hook_entries *e;
size_t alloc = sizeof(*e) +
sizeof(struct nf_hook_entry) * num +
- sizeof(struct nf_hook_ops *) * num;
+ sizeof(struct nf_hook_ops *) * num +
+ sizeof(struct nf_hook_entries_rcu_head);
if (num == 0)
return NULL;
@@ -85,6 +63,30 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
return e;
}
+static void __nf_hook_entries_free(struct rcu_head *h)
+{
+ struct nf_hook_entries_rcu_head *head;
+
+ head = container_of(h, struct nf_hook_entries_rcu_head, head);
+ kvfree(head->allocation);
+}
+
+static void nf_hook_entries_free(struct nf_hook_entries *e)
+{
+ struct nf_hook_entries_rcu_head *head;
+ struct nf_hook_ops **ops;
+ unsigned int num;
+
+ if (!e)
+ return;
+
+ num = e->num_hook_entries;
+ ops = nf_hook_entries_get_hook_ops(e);
+ head = (void *)&ops[num];
+ head->allocation = e;
+ call_rcu(&head->head, __nf_hook_entries_free);
+}
+
static unsigned int accept_all(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -135,6 +137,12 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
++i;
continue;
}
+
+ if (reg->nat_hook && orig_ops[i]->nat_hook) {
+ kvfree(new);
+ return ERR_PTR(-EEXIST);
+ }
+
if (inserted || reg->priority > orig_ops[i]->priority) {
new_ops[nhooks] = (void *)orig_ops[i];
new->hooks[nhooks] = old->hooks[i];
@@ -237,27 +245,61 @@ out_assign:
return old;
}
-static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries __rcu **
+nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
+ struct net_device *dev)
{
- if (reg->pf != NFPROTO_NETDEV)
- return net->nf.hooks[reg->pf]+reg->hooknum;
+ switch (pf) {
+ case NFPROTO_NETDEV:
+ break;
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ case NFPROTO_ARP:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
+ return NULL;
+ return net->nf.hooks_arp + hooknum;
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ case NFPROTO_BRIDGE:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
+ return NULL;
+ return net->nf.hooks_bridge + hooknum;
+#endif
+ case NFPROTO_IPV4:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
+ return NULL;
+ return net->nf.hooks_ipv4 + hooknum;
+ case NFPROTO_IPV6:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
+ return NULL;
+ return net->nf.hooks_ipv6 + hooknum;
+#if IS_ENABLED(CONFIG_DECNET)
+ case NFPROTO_DECNET:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
+ return NULL;
+ return net->nf.hooks_decnet + hooknum;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->hooknum == NF_NETDEV_INGRESS) {
- if (reg->dev && dev_net(reg->dev) == net)
- return &reg->dev->nf_hooks_ingress;
+ if (hooknum == NF_NETDEV_INGRESS) {
+ if (dev && dev_net(dev) == net)
+ return &dev->nf_hooks_ingress;
}
#endif
WARN_ON_ONCE(1);
return NULL;
}
-int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
+static int __nf_register_net_hook(struct net *net, int pf,
+ const struct nf_hook_ops *reg)
{
struct nf_hook_entries *p, *new_hooks;
struct nf_hook_entries __rcu **pp;
- if (reg->pf == NFPROTO_NETDEV) {
+ if (pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS)
return -EOPNOTSUPP;
@@ -267,7 +309,7 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
return -EINVAL;
}
- pp = nf_hook_entry_head(net, reg);
+ pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
if (!pp)
return -EINVAL;
@@ -285,21 +327,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
+ if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL
- static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
+ static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
#endif
- synchronize_net();
BUG_ON(p == new_hooks);
- kvfree(p);
+ nf_hook_entries_free(p);
return 0;
}
-EXPORT_SYMBOL(nf_register_net_hook);
/*
- * __nf_unregister_net_hook - remove a hook from blob
+ * nf_remove_net_hook - remove a hook from blob
*
* @oldp: current address of hook blob
* @unreg: hook to unregister
@@ -307,8 +347,8 @@ EXPORT_SYMBOL(nf_register_net_hook);
* This cannot fail, hook unregistration must always succeed.
* Therefore replace the to-be-removed hook with a dummy hook.
*/
-static void __nf_unregister_net_hook(struct nf_hook_entries *old,
- const struct nf_hook_ops *unreg)
+static void nf_remove_net_hook(struct nf_hook_entries *old,
+ const struct nf_hook_ops *unreg, int pf)
{
struct nf_hook_ops **orig_ops;
bool found = false;
@@ -326,24 +366,24 @@ static void __nf_unregister_net_hook(struct nf_hook_entries *old,
if (found) {
#ifdef CONFIG_NETFILTER_INGRESS
- if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+ if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
net_dec_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL
- static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
+ static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]);
#endif
} else {
- WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+ WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum);
}
}
-void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+void __nf_unregister_net_hook(struct net *net, int pf,
+ const struct nf_hook_ops *reg)
{
struct nf_hook_entries __rcu **pp;
struct nf_hook_entries *p;
- unsigned int nfq;
- pp = nf_hook_entry_head(net, reg);
+ pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
if (!pp)
return;
@@ -355,23 +395,52 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
return;
}
- __nf_unregister_net_hook(p, reg);
+ nf_remove_net_hook(p, reg, pf);
p = __nf_hook_entries_try_shrink(pp);
mutex_unlock(&nf_hook_mutex);
if (!p)
return;
- synchronize_net();
+ nf_queue_nf_hook_drop(net);
+ nf_hook_entries_free(p);
+}
- /* other cpu might still process nfqueue verdict that used reg */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
- synchronize_net();
- kvfree(p);
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+ if (reg->pf == NFPROTO_INET) {
+ __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
+ __nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
+ } else {
+ __nf_unregister_net_hook(net, reg->pf, reg);
+ }
}
EXPORT_SYMBOL(nf_unregister_net_hook);
+int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+ int err;
+
+ if (reg->pf == NFPROTO_INET) {
+ err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
+ if (err < 0)
+ return err;
+
+ err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
+ if (err < 0) {
+ __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
+ return err;
+ }
+ } else {
+ err = __nf_register_net_hook(net, reg->pf, reg);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(nf_register_net_hook);
+
int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int n)
{
@@ -395,63 +464,10 @@ EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int hookcount)
{
- struct nf_hook_entries *to_free[16], *p;
- struct nf_hook_entries __rcu **pp;
- unsigned int i, j, n;
-
- mutex_lock(&nf_hook_mutex);
- for (i = 0; i < hookcount; i++) {
- pp = nf_hook_entry_head(net, &reg[i]);
- if (!pp)
- continue;
-
- p = nf_entry_dereference(*pp);
- if (WARN_ON_ONCE(!p))
- continue;
- __nf_unregister_net_hook(p, &reg[i]);
- }
- mutex_unlock(&nf_hook_mutex);
-
- do {
- n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
-
- mutex_lock(&nf_hook_mutex);
-
- for (i = 0, j = 0; i < hookcount && j < n; i++) {
- pp = nf_hook_entry_head(net, &reg[i]);
- if (!pp)
- continue;
-
- p = nf_entry_dereference(*pp);
- if (!p)
- continue;
-
- to_free[j] = __nf_hook_entries_try_shrink(pp);
- if (to_free[j])
- ++j;
- }
-
- mutex_unlock(&nf_hook_mutex);
-
- if (j) {
- unsigned int nfq;
-
- synchronize_net();
-
- /* need 2nd synchronize_net() if nfqueue is used, skb
- * can get reinjected right before nf_queue_hook_drop()
- */
- nfq = nf_queue_nf_hook_drop(net);
- if (nfq)
- synchronize_net();
-
- for (i = 0; i < j; i++)
- kvfree(to_free[i]);
- }
+ unsigned int i;
- reg += n;
- hookcount -= n;
- } while (hookcount > 0);
+ for (i = 0; i < hookcount; i++)
+ nf_unregister_net_hook(net, &reg[i]);
}
EXPORT_SYMBOL(nf_unregister_net_hooks);
@@ -569,14 +585,27 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif
-static int __net_init netfilter_net_init(struct net *net)
+static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
{
- int i, h;
+ int h;
- for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
- for (h = 0; h < NF_MAX_HOOKS; h++)
- RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
- }
+ for (h = 0; h < max; h++)
+ RCU_INIT_POINTER(e[h], NULL);
+}
+
+static int __net_init netfilter_net_init(struct net *net)
+{
+ __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
+ __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
+#endif
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
+#endif
+#if IS_ENABLED(CONFIG_DECNET)
+ __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
+#endif
#ifdef CONFIG_PROC_FS
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 5ca18f07683b..257ca393e6f2 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -127,14 +127,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (ret <= 0)
return ret;
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(x, set)))
- return 0;
- if (SET_WITH_COUNTER(set))
- ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
- if (SET_WITH_SKBINFO(set))
- ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags);
- return 1;
+ return ip_set_match_extensions(set, ext, mext, flags, x);
}
static int
@@ -227,6 +220,7 @@ mtype_list(const struct ip_set *set,
rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < map->elements;
cb->args[IPSET_CB_ARG0]++) {
+ cond_resched_rcu();
id = cb->args[IPSET_CB_ARG0];
x = get_ext(set, map, id);
if (!test_bit(id, map->members) ||
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index d8975a0b4282..488d6d05c65c 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -263,12 +263,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
if (ret)
return ret;
- if (first_ip > last_ip) {
- u32 tmp = first_ip;
-
- first_ip = last_ip;
- last_ip = tmp;
- }
+ if (first_ip > last_ip)
+ swap(first_ip, last_ip);
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 4c279fbd2d5d..c00b6a2e8e3c 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -337,12 +337,8 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip);
if (ret)
return ret;
- if (first_ip > last_ip) {
- u32 tmp = first_ip;
-
- first_ip = last_ip;
- last_ip = tmp;
- }
+ if (first_ip > last_ip)
+ swap(first_ip, last_ip);
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f9bbd7c98b5..b561ca8b3659 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -238,12 +238,8 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
- if (first_port > last_port) {
- u16 tmp = first_port;
-
- first_port = last_port;
- last_port = tmp;
- }
+ if (first_port > last_port)
+ swap(first_port, last_port);
elements = last_port - first_port + 1;
set->dsize = ip_set_elem_len(set, tb, 0, 0);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index cf84f7b37cd9..728bf31bb386 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -57,7 +57,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex is held: */
#define ip_set_dereference(p) \
- rcu_dereference_protected(p, 1)
+ rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
#define ip_set(inst, id) \
ip_set_dereference((inst)->ip_set_list)[id]
@@ -472,6 +472,31 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
}
EXPORT_SYMBOL_GPL(ip_set_put_extensions);
+bool
+ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags, void *data)
+{
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(data, set)))
+ return false;
+ if (SET_WITH_COUNTER(set)) {
+ struct ip_set_counter *counter = ext_counter(data, set);
+
+ if (flags & IPSET_FLAG_MATCH_COUNTERS &&
+ !(ip_set_match_counter(ip_set_get_packets(counter),
+ mext->packets, mext->packets_op) &&
+ ip_set_match_counter(ip_set_get_bytes(counter),
+ mext->bytes, mext->bytes_op)))
+ return false;
+ ip_set_update_counter(counter, ext, flags);
+ }
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(data, set),
+ ext, mext, flags);
+ return true;
+}
+EXPORT_SYMBOL_GPL(ip_set_match_extensions);
+
/* Creating/destroying/renaming/swapping affect the existence and
* the properties of a set. All of these can be executed from userspace
* only and serialized by the nfnl mutex indirectly from nfnetlink.
@@ -1386,11 +1411,9 @@ dump_last:
goto next_set;
if (set->variant->uref)
set->variant->uref(set, cb, true);
- /* Fall through and add elements */
+ /* fall through */
default:
- rcu_read_lock_bh();
ret = set->variant->list(set, skb, cb);
- rcu_read_unlock_bh();
if (!cb->args[IPSET_CB_ARG0])
/* Set is done, proceed with next one */
goto next_set;
@@ -2055,6 +2078,7 @@ ip_set_net_exit(struct net *net)
inst->is_deleted = true; /* flag for ip_set_nfnl_put */
+ nfnl_lock(NFNL_SUBSYS_IPSET);
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
if (set) {
@@ -2062,6 +2086,7 @@ ip_set_net_exit(struct net *net)
ip_set_destroy_set(set);
}
}
+ nfnl_unlock(NFNL_SUBSYS_IPSET);
kfree(rcu_dereference_protected(inst->ip_set_list, 1));
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index efffc8eabafe..bbad940c0137 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -917,12 +917,9 @@ static inline int
mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
struct ip_set_ext *mext, struct ip_set *set, u32 flags)
{
- if (SET_WITH_COUNTER(set))
- ip_set_update_counter(ext_counter(data, set),
- ext, mext, flags);
- if (SET_WITH_SKBINFO(set))
- ip_set_get_skbinfo(ext_skbinfo(data, set),
- ext, mext, flags);
+ if (!ip_set_match_extensions(set, ext, mext, flags, data))
+ return 0;
+ /* nomatch entries return -ENOTEMPTY */
return mtype_do_data_match(data);
}
@@ -941,9 +938,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
struct mtype_elem *data;
#if IPSET_NET_COUNT == 2
struct mtype_elem orig = *d;
- int i, j = 0, k;
+ int ret, i, j = 0, k;
#else
- int i, j = 0;
+ int ret, i, j = 0;
#endif
u32 key, multi = 0;
@@ -969,18 +966,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
data = ahash_data(n, i, set->dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
- if (SET_WITH_TIMEOUT(set)) {
- if (!ip_set_timeout_expired(
- ext_timeout(data, set)))
- return mtype_data_match(data, ext,
- mext, set,
- flags);
+ ret = mtype_data_match(data, ext, mext, set, flags);
+ if (ret != 0)
+ return ret;
#ifdef IP_SET_HASH_WITH_MULTI
- multi = 0;
+ /* No match, reset multiple match flag */
+ multi = 0;
#endif
- } else
- return mtype_data_match(data, ext,
- mext, set, flags);
}
#if IPSET_NET_COUNT == 2
}
@@ -1027,12 +1019,11 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (!test_bit(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
- if (mtype_data_equal(data, d, &multi) &&
- !(SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))) {
- ret = mtype_data_match(data, ext, mext, set, flags);
+ if (!mtype_data_equal(data, d, &multi))
+ continue;
+ ret = mtype_data_match(data, ext, mext, set, flags);
+ if (ret != 0)
goto out;
- }
}
out:
return ret;
@@ -1143,6 +1134,7 @@ mtype_list(const struct ip_set *set,
rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
cb->args[IPSET_CB_ARG0]++) {
+ cond_resched_rcu();
incomplete = skb_tail_pointer(skb);
n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
pr_debug("cb->arg bucket: %lu, t %p n %p\n",
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e864681b8dc5..072a658fde04 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -55,8 +55,9 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
{
struct list_set *map = set->data;
+ struct ip_set_ext *mext = &opt->ext;
struct set_elem *e;
- u32 cmdflags = opt->cmdflags;
+ u32 flags = opt->cmdflags;
int ret;
/* Don't lookup sub-counters at all */
@@ -64,21 +65,11 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
list_for_each_entry_rcu(e, &map->members, list) {
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
- continue;
ret = ip_set_test(e->id, skb, par, opt);
- if (ret > 0) {
- if (SET_WITH_COUNTER(set))
- ip_set_update_counter(ext_counter(e, set),
- ext, &opt->ext,
- cmdflags);
- if (SET_WITH_SKBINFO(set))
- ip_set_get_skbinfo(ext_skbinfo(e, set),
- ext, &opt->ext,
- cmdflags);
- return ret;
- }
+ if (ret <= 0)
+ continue;
+ if (ip_set_match_extensions(set, ext, mext, flags, e))
+ return 1;
}
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3e053cb30070..f489b8db2406 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -322,7 +322,7 @@ ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs,
{
__be16 _ports[2], *pptr;
- pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
+ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
if (pptr == NULL)
return 1;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5cb7cac9177d..5f6f73cf2174 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -433,7 +433,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* IPv6 frags, only the first hit here.
*/
- pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
+ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
@@ -566,7 +566,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct netns_ipvs *ipvs = svc->ipvs;
struct net *net = ipvs->net;
- pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
+ pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports);
if (!pptr)
return NF_DROP;
dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
@@ -982,7 +982,7 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
unsigned int offset;
*related = 1;
- ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
+ ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph);
if (ic == NULL)
return NF_DROP;
@@ -1214,7 +1214,7 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
return NULL;
pptr = frag_safe_skb_hp(skb, iph->len,
- sizeof(_ports), _ports, iph);
+ sizeof(_ports), _ports);
if (!pptr)
return NULL;
@@ -1407,7 +1407,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
__be16 _ports[2], *pptr;
pptr = frag_safe_skb_hp(skb, iph.len,
- sizeof(_ports), _ports, &iph);
+ sizeof(_ports), _ports);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr,
@@ -1741,7 +1741,7 @@ static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
*related = 1;
- ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph);
+ ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph);
if (ic == NULL)
return NF_DROP;
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 121a321b91be..bcd9b7bde4ee 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -315,6 +315,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
+ /* fall through */
case CHECKSUM_COMPLETE:
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 30e11cd6aa8a..c15ef7c2a1fa 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -319,6 +319,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
+ /* fall through */
case CHECKSUM_COMPLETE:
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
new file mode 100644
index 000000000000..a95518261168
--- /dev/null
+++ b/net/netfilter/nf_conncount.c
@@ -0,0 +1,373 @@
+/*
+ * count the number of connections matching an arbitrary key.
+ *
+ * (C) 2017 Red Hat GmbH
+ * Author: Florian Westphal <fw@strlen.de>
+ *
+ * split from xt_connlimit.c:
+ * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
+ * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
+ * only ignore TIME_WAIT or gone connections
+ * (C) CC Computer Consultants GmbH, 2007
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_count.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+#define CONNCOUNT_SLOTS 256U
+
+#ifdef CONFIG_LOCKDEP
+#define CONNCOUNT_LOCK_SLOTS 8U
+#else
+#define CONNCOUNT_LOCK_SLOTS 256U
+#endif
+
+#define CONNCOUNT_GC_MAX_NODES 8
+#define MAX_KEYLEN 5
+
+/* we will save the tuples of all connections we care about */
+struct nf_conncount_tuple {
+ struct hlist_node node;
+ struct nf_conntrack_tuple tuple;
+};
+
+struct nf_conncount_rb {
+ struct rb_node node;
+ struct hlist_head hhead; /* connections/hosts in same subnet */
+ u32 key[MAX_KEYLEN];
+};
+
+static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
+
+struct nf_conncount_data {
+ unsigned int keylen;
+ struct rb_root root[CONNCOUNT_SLOTS];
+};
+
+static u_int32_t conncount_rnd __read_mostly;
+static struct kmem_cache *conncount_rb_cachep __read_mostly;
+static struct kmem_cache *conncount_conn_cachep __read_mostly;
+
+static inline bool already_closed(const struct nf_conn *conn)
+{
+ if (nf_ct_protonum(conn) == IPPROTO_TCP)
+ return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
+ conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
+ else
+ return 0;
+}
+
+static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
+{
+ return memcmp(a, b, klen * sizeof(u32));
+}
+
+static bool add_hlist(struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple)
+{
+ struct nf_conncount_tuple *conn;
+
+ conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL)
+ return false;
+ conn->tuple = *tuple;
+ hlist_add_head(&conn->node, head);
+ return true;
+}
+
+static unsigned int check_hlist(struct net *net,
+ struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone,
+ bool *addit)
+{
+ const struct nf_conntrack_tuple_hash *found;
+ struct nf_conncount_tuple *conn;
+ struct hlist_node *n;
+ struct nf_conn *found_ct;
+ unsigned int length = 0;
+
+ *addit = true;
+
+ /* check the saved connections */
+ hlist_for_each_entry_safe(conn, n, head, node) {
+ found = nf_conntrack_find_get(net, zone, &conn->tuple);
+ if (found == NULL) {
+ hlist_del(&conn->node);
+ kmem_cache_free(conncount_conn_cachep, conn);
+ continue;
+ }
+
+ found_ct = nf_ct_tuplehash_to_ctrack(found);
+
+ if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
+ /*
+ * Just to be sure we have it only once in the list.
+ * We should not see tuples twice unless someone hooks
+ * this into a table without "-p tcp --syn".
+ */
+ *addit = false;
+ } else if (already_closed(found_ct)) {
+ /*
+ * we do not care about connections which are
+ * closed already -> ditch it
+ */
+ nf_ct_put(found_ct);
+ hlist_del(&conn->node);
+ kmem_cache_free(conncount_conn_cachep, conn);
+ continue;
+ }
+
+ nf_ct_put(found_ct);
+ length++;
+ }
+
+ return length;
+}
+
+static void tree_nodes_free(struct rb_root *root,
+ struct nf_conncount_rb *gc_nodes[],
+ unsigned int gc_count)
+{
+ struct nf_conncount_rb *rbconn;
+
+ while (gc_count) {
+ rbconn = gc_nodes[--gc_count];
+ rb_erase(&rbconn->node, root);
+ kmem_cache_free(conncount_rb_cachep, rbconn);
+ }
+}
+
+static unsigned int
+count_tree(struct net *net, struct rb_root *root,
+ const u32 *key, u8 keylen,
+ u8 family,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+{
+ struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+ struct rb_node **rbnode, *parent;
+ struct nf_conncount_rb *rbconn;
+ struct nf_conncount_tuple *conn;
+ unsigned int gc_count;
+ bool no_gc = false;
+
+ restart:
+ gc_count = 0;
+ parent = NULL;
+ rbnode = &(root->rb_node);
+ while (*rbnode) {
+ int diff;
+ bool addit;
+
+ rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
+
+ parent = *rbnode;
+ diff = key_diff(key, rbconn->key, keylen);
+ if (diff < 0) {
+ rbnode = &((*rbnode)->rb_left);
+ } else if (diff > 0) {
+ rbnode = &((*rbnode)->rb_right);
+ } else {
+ /* same source network -> be counted! */
+ unsigned int count;
+ count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
+
+ tree_nodes_free(root, gc_nodes, gc_count);
+ if (!addit)
+ return count;
+
+ if (!add_hlist(&rbconn->hhead, tuple))
+ return 0; /* hotdrop */
+
+ return count + 1;
+ }
+
+ if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
+ continue;
+
+ /* only used for GC on hhead, retval and 'addit' ignored */
+ check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
+ if (hlist_empty(&rbconn->hhead))
+ gc_nodes[gc_count++] = rbconn;
+ }
+
+ if (gc_count) {
+ no_gc = true;
+ tree_nodes_free(root, gc_nodes, gc_count);
+ /* tree_node_free before new allocation permits
+ * allocator to re-use newly free'd object.
+ *
+ * This is a rare event; in most cases we will find
+ * existing node to re-use. (or gc_count is 0).
+ */
+ goto restart;
+ }
+
+ /* no match, need to insert new node */
+ rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
+ if (rbconn == NULL)
+ return 0;
+
+ conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL) {
+ kmem_cache_free(conncount_rb_cachep, rbconn);
+ return 0;
+ }
+
+ conn->tuple = *tuple;
+ memcpy(rbconn->key, key, sizeof(u32) * keylen);
+
+ INIT_HLIST_HEAD(&rbconn->hhead);
+ hlist_add_head(&conn->node, &rbconn->hhead);
+
+ rb_link_node(&rbconn->node, parent, rbnode);
+ rb_insert_color(&rbconn->node, root);
+ return 1;
+}
+
+unsigned int nf_conncount_count(struct net *net,
+ struct nf_conncount_data *data,
+ const u32 *key,
+ unsigned int family,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+{
+ struct rb_root *root;
+ int count;
+ u32 hash;
+
+ hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
+ root = &data->root[hash];
+
+ spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+
+ count = count_tree(net, root, key, data->keylen, family, tuple, zone);
+
+ spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+
+ return count;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_count);
+
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
+ unsigned int keylen)
+{
+ struct nf_conncount_data *data;
+ int ret, i;
+
+ if (keylen % sizeof(u32) ||
+ keylen / sizeof(u32) > MAX_KEYLEN ||
+ keylen == 0)
+ return ERR_PTR(-EINVAL);
+
+ net_get_random_once(&conncount_rnd, sizeof(conncount_rnd));
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ ret = nf_ct_netns_get(net, family);
+ if (ret < 0) {
+ kfree(data);
+ return ERR_PTR(ret);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(data->root); ++i)
+ data->root[i] = RB_ROOT;
+
+ data->keylen = keylen / sizeof(u32);
+
+ return data;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_init);
+
+static void destroy_tree(struct rb_root *r)
+{
+ struct nf_conncount_tuple *conn;
+ struct nf_conncount_rb *rbconn;
+ struct hlist_node *n;
+ struct rb_node *node;
+
+ while ((node = rb_first(r)) != NULL) {
+ rbconn = rb_entry(node, struct nf_conncount_rb, node);
+
+ rb_erase(node, r);
+
+ hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
+ kmem_cache_free(conncount_conn_cachep, conn);
+
+ kmem_cache_free(conncount_rb_cachep, rbconn);
+ }
+}
+
+void nf_conncount_destroy(struct net *net, unsigned int family,
+ struct nf_conncount_data *data)
+{
+ unsigned int i;
+
+ nf_ct_netns_put(net, family);
+
+ for (i = 0; i < ARRAY_SIZE(data->root); ++i)
+ destroy_tree(&data->root[i]);
+
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(nf_conncount_destroy);
+
+static int __init nf_conncount_modinit(void)
+{
+ int i;
+
+ BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS);
+ BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
+
+ for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
+ spin_lock_init(&nf_conncount_locks[i]);
+
+ conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
+ sizeof(struct nf_conncount_tuple),
+ 0, 0, NULL);
+ if (!conncount_conn_cachep)
+ return -ENOMEM;
+
+ conncount_rb_cachep = kmem_cache_create("nf_conncount_rb",
+ sizeof(struct nf_conncount_rb),
+ 0, 0, NULL);
+ if (!conncount_rb_cachep) {
+ kmem_cache_destroy(conncount_conn_cachep);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void __exit nf_conncount_modexit(void)
+{
+ kmem_cache_destroy(conncount_conn_cachep);
+ kmem_cache_destroy(conncount_rb_cachep);
+}
+
+module_init(nf_conncount_modinit);
+module_exit(nf_conncount_modexit);
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("netfilter: count number of connections matching a key");
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 85f643c1e227..6a64d528d076 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -901,6 +901,9 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
+ continue;
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
continue;
@@ -975,6 +978,18 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
return false;
}
+#define DAY (86400 * HZ)
+
+/* Set an arbitrary timeout large enough not to ever expire, this save
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
+ * nf_ct_is_expired().
+ */
+static void nf_ct_offload_timeout(struct nf_conn *ct)
+{
+ if (nf_ct_expires(ct) < DAY / 2)
+ ct->timeout = nfct_time_stamp + DAY;
+}
+
static void gc_worker(struct work_struct *work)
{
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
@@ -1011,6 +1026,11 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h);
scanned++;
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
+ nf_ct_offload_timeout(tmp);
+ continue;
+ }
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
expired_count++;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index dc6347342e34..1601275efe2d 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -1,4 +1,4 @@
-/****************************************************************************
+/*
* ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
* conntrack/NAT module.
*
@@ -8,7 +8,7 @@
*
* See ip_conntrack_helper_h323_asn1.h for details.
*
- ****************************************************************************/
+ */
#ifdef __KERNEL__
#include <linux/kernel.h>
@@ -140,14 +140,15 @@ static const decoder_t Decoders[] = {
decode_choice,
};
-/****************************************************************************
+/*
* H.323 Types
- ****************************************************************************/
+ */
#include "nf_conntrack_h323_types.c"
-/****************************************************************************
+/*
* Functions
- ****************************************************************************/
+ */
+
/* Assume bs is aligned && v < 16384 */
static unsigned int get_len(struct bitstr *bs)
{
@@ -177,7 +178,6 @@ static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
return 0;
}
-/****************************************************************************/
static unsigned int get_bit(struct bitstr *bs)
{
unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
@@ -187,7 +187,6 @@ static unsigned int get_bit(struct bitstr *bs)
return b;
}
-/****************************************************************************/
/* Assume b <= 8 */
static unsigned int get_bits(struct bitstr *bs, unsigned int b)
{
@@ -213,7 +212,6 @@ static unsigned int get_bits(struct bitstr *bs, unsigned int b)
return v;
}
-/****************************************************************************/
/* Assume b <= 32 */
static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
{
@@ -251,9 +249,9 @@ static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
return v;
}
-/****************************************************************************
+/*
* Assume bs is aligned and sizeof(unsigned int) == 4
- ****************************************************************************/
+ */
static unsigned int get_uint(struct bitstr *bs, int b)
{
unsigned int v = 0;
@@ -262,12 +260,15 @@ static unsigned int get_uint(struct bitstr *bs, int b)
case 4:
v |= *bs->cur++;
v <<= 8;
+ /* fall through */
case 3:
v |= *bs->cur++;
v <<= 8;
+ /* fall through */
case 2:
v |= *bs->cur++;
v <<= 8;
+ /* fall through */
case 1:
v |= *bs->cur++;
break;
@@ -275,7 +276,6 @@ static unsigned int get_uint(struct bitstr *bs, int b)
return v;
}
-/****************************************************************************/
static int decode_nul(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -284,7 +284,6 @@ static int decode_nul(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_bool(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -296,7 +295,6 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_oid(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -316,7 +314,6 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_int(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -364,7 +361,6 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_enum(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -381,7 +377,6 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -418,7 +413,6 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_numstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -439,7 +433,6 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_octstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -493,7 +486,6 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -523,7 +515,6 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_seq(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -653,7 +644,6 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
static int decode_seqof(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -750,8 +740,6 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-
-/****************************************************************************/
static int decode_choice(struct bitstr *bs, const struct field_t *f,
char *base, int level)
{
@@ -833,7 +821,6 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
return H323_ERROR_NONE;
}
-/****************************************************************************/
int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
{
static const struct field_t ras_message = {
@@ -849,7 +836,6 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
return decode_choice(&bs, &ras_message, (char *) ras, 0);
}
-/****************************************************************************/
static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
size_t sz, H323_UserInformation *uuie)
{
@@ -867,7 +853,6 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
return decode_seq(&bs, &h323_userinformation, (char *) uuie, 0);
}
-/****************************************************************************/
int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
MultimediaSystemControlMessage *
mscm)
@@ -886,7 +871,6 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
(char *) mscm, 0);
}
-/****************************************************************************/
int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
{
unsigned char *p = buf;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f71f0d2558fd..005589c6d0f6 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -24,6 +24,7 @@
#include <linux/skbuff.h>
#include <net/route.h>
#include <net/ip6_route.h>
+#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -115,7 +116,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245;
static struct nf_conntrack_helper nf_conntrack_helper_q931[];
static struct nf_conntrack_helper nf_conntrack_helper_ras[];
-/****************************************************************************/
static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
unsigned char **data, int *datalen, int *dataoff)
@@ -219,7 +219,6 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
return 0;
}
-/****************************************************************************/
static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
H245_TransportAddress *taddr,
union nf_inet_addr *addr, __be16 *port)
@@ -254,7 +253,6 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
return 1;
}
-/****************************************************************************/
static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -328,7 +326,6 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int expect_t120(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -380,7 +377,6 @@ static int expect_t120(struct sk_buff *skb,
return ret;
}
-/****************************************************************************/
static int process_h245_channel(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -410,7 +406,6 @@ static int process_h245_channel(struct sk_buff *skb,
return 0;
}
-/****************************************************************************/
static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -472,7 +467,6 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data, int dataoff,
@@ -542,7 +536,6 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data, int dataoff,
@@ -578,7 +571,6 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int h245_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
@@ -628,7 +620,6 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
return NF_DROP;
}
-/****************************************************************************/
static const struct nf_conntrack_expect_policy h245_exp_policy = {
.max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */,
.timeout = 240,
@@ -643,7 +634,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
.expect_policy = &h245_exp_policy,
};
-/****************************************************************************/
int get_h225_addr(struct nf_conn *ct, unsigned char *data,
TransportAddress *taddr,
union nf_inet_addr *addr, __be16 *port)
@@ -675,7 +665,6 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data,
return 1;
}
-/****************************************************************************/
static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data, int dataoff,
@@ -726,20 +715,15 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
}
/* If the calling party is on the same side of the forward-to party,
- * we don't need to track the second call */
+ * we don't need to track the second call
+ */
static int callforward_do_filter(struct net *net,
const union nf_inet_addr *src,
const union nf_inet_addr *dst,
u_int8_t family)
{
- const struct nf_afinfo *afinfo;
int ret = 0;
- /* rcu_read_lock()ed by nf_hook_thresh */
- afinfo = nf_get_afinfo(family);
- if (!afinfo)
- return 0;
-
switch (family) {
case AF_INET: {
struct flowi4 fl1, fl2;
@@ -750,10 +734,10 @@ static int callforward_do_filter(struct net *net,
memset(&fl2, 0, sizeof(fl2));
fl2.daddr = dst->ip;
- if (!afinfo->route(net, (struct dst_entry **)&rt1,
- flowi4_to_flowi(&fl1), false)) {
- if (!afinfo->route(net, (struct dst_entry **)&rt2,
- flowi4_to_flowi(&fl2), false)) {
+ if (!nf_ip_route(net, (struct dst_entry **)&rt1,
+ flowi4_to_flowi(&fl1), false)) {
+ if (!nf_ip_route(net, (struct dst_entry **)&rt2,
+ flowi4_to_flowi(&fl2), false)) {
if (rt_nexthop(rt1, fl1.daddr) ==
rt_nexthop(rt2, fl2.daddr) &&
rt1->dst.dev == rt2->dst.dev)
@@ -766,18 +750,23 @@ static int callforward_do_filter(struct net *net,
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
case AF_INET6: {
- struct flowi6 fl1, fl2;
+ const struct nf_ipv6_ops *v6ops;
struct rt6_info *rt1, *rt2;
+ struct flowi6 fl1, fl2;
+
+ v6ops = nf_get_ipv6_ops();
+ if (!v6ops)
+ return 0;
memset(&fl1, 0, sizeof(fl1));
fl1.daddr = src->in6;
memset(&fl2, 0, sizeof(fl2));
fl2.daddr = dst->in6;
- if (!afinfo->route(net, (struct dst_entry **)&rt1,
- flowi6_to_flowi(&fl1), false)) {
- if (!afinfo->route(net, (struct dst_entry **)&rt2,
- flowi6_to_flowi(&fl2), false)) {
+ if (!v6ops->route(net, (struct dst_entry **)&rt1,
+ flowi6_to_flowi(&fl1), false)) {
+ if (!v6ops->route(net, (struct dst_entry **)&rt2,
+ flowi6_to_flowi(&fl2), false)) {
if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
rt6_nexthop(rt2, &fl2.daddr)) &&
rt1->dst.dev == rt2->dst.dev)
@@ -794,7 +783,6 @@ static int callforward_do_filter(struct net *net,
}
-/****************************************************************************/
static int expect_callforwarding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -815,7 +803,8 @@ static int expect_callforwarding(struct sk_buff *skb,
return 0;
/* If the calling party is on the same side of the forward-to party,
- * we don't need to track the second call */
+ * we don't need to track the second call
+ */
if (callforward_filter &&
callforward_do_filter(net, &addr, &ct->tuplehash[!dir].tuple.src.u3,
nf_ct_l3num(ct))) {
@@ -854,7 +843,6 @@ static int expect_callforwarding(struct sk_buff *skb,
return ret;
}
-/****************************************************************************/
static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -925,7 +913,6 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_callproceeding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -958,7 +945,6 @@ static int process_callproceeding(struct sk_buff *skb,
return 0;
}
-/****************************************************************************/
static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -990,7 +976,6 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1022,7 +1007,6 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1063,7 +1047,6 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1095,7 +1078,6 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data, int dataoff,
@@ -1154,7 +1136,6 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int q931_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
@@ -1203,7 +1184,6 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
return NF_DROP;
}
-/****************************************************************************/
static const struct nf_conntrack_expect_policy q931_exp_policy = {
/* T.120 and H.245 */
.max_expected = H323_RTP_CHANNEL_MAX * 4 + 4,
@@ -1231,7 +1211,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
},
};
-/****************************************************************************/
static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
int *datalen)
{
@@ -1249,7 +1228,6 @@ static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
}
-/****************************************************************************/
static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
union nf_inet_addr *addr,
__be16 port)
@@ -1270,7 +1248,6 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
return NULL;
}
-/****************************************************************************/
static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data,
@@ -1328,7 +1305,6 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1346,7 +1322,6 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1391,7 +1366,6 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1428,7 +1402,6 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1480,7 +1453,6 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1514,7 +1486,6 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1559,7 +1530,6 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1608,7 +1578,6 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1626,7 +1595,6 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1666,7 +1634,6 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
return ret;
}
-/****************************************************************************/
static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1700,7 +1667,6 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff,
@@ -1745,7 +1711,6 @@ static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
-/****************************************************************************/
static int ras_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
@@ -1788,7 +1753,6 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff,
return NF_DROP;
}
-/****************************************************************************/
static const struct nf_conntrack_expect_policy ras_exp_policy = {
.max_expected = 32,
.timeout = 240,
@@ -1849,7 +1813,6 @@ static void __exit h323_helper_exit(void)
nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
}
-/****************************************************************************/
static void __exit nf_conntrack_h323_fini(void)
{
h323_helper_exit();
@@ -1857,7 +1820,6 @@ static void __exit nf_conntrack_h323_fini(void)
pr_debug("nf_ct_h323: fini\n");
}
-/****************************************************************************/
static int __init nf_conntrack_h323_init(void)
{
int ret;
@@ -1877,7 +1839,6 @@ err1:
return ret;
}
-/****************************************************************************/
module_init(nf_conntrack_h323_init);
module_exit(nf_conntrack_h323_fini);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 382d49792f42..7c7921a53b13 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -544,7 +544,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
len *= 3u; /* ORIG, REPLY, MASTER */
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
- len += l4proto->nla_size;
+ len += l4proto->nlattr_size;
if (l4proto->nlattr_tuple_size) {
len4 = l4proto->nlattr_tuple_size();
len4 *= 3u; /* ORIG, REPLY, MASTER */
@@ -1110,6 +1110,14 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
.len = NF_CT_LABELS_MAX_SIZE },
};
+static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
+{
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return 0;
+
+ return ctnetlink_filter_match(ct, data);
+}
+
static int ctnetlink_flush_conntrack(struct net *net,
const struct nlattr * const cda[],
u32 portid, int report)
@@ -1122,7 +1130,7 @@ static int ctnetlink_flush_conntrack(struct net *net,
return PTR_ERR(filter);
}
- nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
+ nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
portid, report);
kfree(filter);
@@ -1168,6 +1176,11 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
ct = nf_ct_tuplehash_to_ctrack(h);
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
+ nf_ct_put(ct);
+ return -EBUSY;
+ }
+
if (cda[CTA_ID]) {
u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
if (id != (u32)(unsigned long)ct) {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index c8e9c9503a08..afdeca53e88b 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -385,14 +385,14 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
/* FIXME: Allow NULL functions and sub in pointers to generic for
them. --RR */
-int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
return -EBUSY;
- if ((l4proto->to_nlattr && !l4proto->nlattr_size) ||
+ if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
(l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
return -EINVAL;
@@ -428,10 +428,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
goto out_unlock;
}
- l4proto->nla_size = 0;
- if (l4proto->nlattr_size)
- l4proto->nla_size += l4proto->nlattr_size();
-
rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
l4proto);
out_unlock:
@@ -502,7 +498,7 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
-int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
+int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL, ver;
@@ -524,7 +520,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
int nf_ct_l4proto_pernet_register(struct net *net,
- struct nf_conntrack_l4proto *const l4proto[],
+ const struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
{
int ret = -EINVAL;
@@ -545,7 +541,7 @@ int nf_ct_l4proto_pernet_register(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
-void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
+void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
mutex_lock(&nf_ct_proto_mutex);
@@ -555,12 +551,12 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
synchronize_net();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l4proto, l4proto);
+ nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
void nf_ct_l4proto_pernet_unregister(struct net *net,
- struct nf_conntrack_l4proto *const l4proto[],
+ const struct nf_conntrack_l4proto *const l4proto[],
unsigned int num_proto)
{
while (num_proto-- != 0)
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 2a446f4a554c..abe647d5b8c6 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -654,6 +654,12 @@ static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
[CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC },
};
+#define DCCP_NLATTR_SIZE ( \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \
+ NLA_ALIGN(NLA_HDRLEN + 0))
+
static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
{
struct nlattr *attr = cda[CTA_PROTOINFO_DCCP];
@@ -691,13 +697,6 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
spin_unlock_bh(&ct->lock);
return 0;
}
-
-static int dccp_nlattr_size(void)
-{
- return nla_total_size(0) /* CTA_PROTOINFO_DCCP */
- + nla_policy_len(dccp_nla_policy, CTA_PROTOINFO_DCCP_MAX + 1);
-}
-
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
@@ -862,7 +861,7 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.dccp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
.pkt_to_tuple = dccp_pkt_to_tuple,
@@ -876,8 +875,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.print_conntrack = dccp_print_conntrack,
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = DCCP_NLATTR_SIZE,
.to_nlattr = dccp_to_nlattr,
- .nlattr_size = dccp_nlattr_size,
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
@@ -898,7 +897,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
-struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
.pkt_to_tuple = dccp_pkt_to_tuple,
@@ -912,8 +911,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.print_conntrack = dccp_print_conntrack,
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = DCCP_NLATTR_SIZE,
.to_nlattr = dccp_to_nlattr,
- .nlattr_size = dccp_nlattr_size,
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 1f86ddf6649a..6c6896d21cd7 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
-static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
+static const unsigned int nf_ct_generic_timeout = 600*HZ;
static bool nf_generic_should_process(u8 proto)
{
@@ -163,7 +163,7 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.generic.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
{
.l3proto = PF_UNSPEC,
.l4proto = 255,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a2503005d80b..d049ea5a3770 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -48,7 +48,7 @@ enum grep_conntrack {
GRE_CT_MAX
};
-static unsigned int gre_timeouts[GRE_CT_MAX] = {
+static const unsigned int gre_timeouts[GRE_CT_MAX] = {
[GRE_CT_UNREPLIED] = 30*HZ,
[GRE_CT_REPLIED] = 180*HZ,
};
@@ -352,7 +352,7 @@ static int gre_init_net(struct net *net, u_int16_t proto)
}
/* protocol helper struct */
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
+static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
.l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
.pkt_to_tuple = gre_pkt_to_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 80faf04ddf15..fb9a35d16069 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -52,7 +52,7 @@ static const char *const sctp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
+static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_CLOSED] = 10 SECS,
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
@@ -578,6 +578,11 @@ static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = {
[CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 },
};
+#define SCTP_NLATTR_SIZE ( \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + 4) + \
+ NLA_ALIGN(NLA_HDRLEN + 4))
+
static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
{
struct nlattr *attr = cda[CTA_PROTOINFO_SCTP];
@@ -608,12 +613,6 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
-
-static int sctp_nlattr_size(void)
-{
- return nla_total_size(0) /* CTA_PROTOINFO_SCTP */
- + nla_policy_len(sctp_nla_policy, CTA_PROTOINFO_SCTP_MAX + 1);
-}
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
@@ -778,7 +777,7 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.sctp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
.pkt_to_tuple = sctp_pkt_to_tuple,
@@ -793,8 +792,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = SCTP_NLATTR_SIZE,
.to_nlattr = sctp_to_nlattr,
- .nlattr_size = sctp_nlattr_size,
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
@@ -815,7 +814,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
-struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
.pkt_to_tuple = sctp_pkt_to_tuple,
@@ -830,8 +829,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = SCTP_NLATTR_SIZE,
.to_nlattr = sctp_to_nlattr,
- .nlattr_size = sctp_nlattr_size,
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 37ef35b861f2..e97cdc1cf98c 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -68,7 +68,7 @@ static const char *const tcp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
+static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
[TCP_CONNTRACK_SYN_SENT] = 2 MINS,
[TCP_CONNTRACK_SYN_RECV] = 60 SECS,
[TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return;
+
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
}
#endif
@@ -1222,6 +1225,12 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
[CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
};
+#define TCP_NLATTR_SIZE ( \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + 1) + \
+ NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \
+ NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))))
+
static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
{
struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
@@ -1274,12 +1283,6 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
-static int tcp_nlattr_size(void)
-{
- return nla_total_size(0) /* CTA_PROTOINFO_TCP */
- + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
-}
-
static unsigned int tcp_nlattr_tuple_size(void)
{
static unsigned int size __read_mostly;
@@ -1541,7 +1544,7 @@ static struct nf_proto_net *tcp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.tcp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
@@ -1557,11 +1560,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr,
- .nlattr_size = tcp_nlattr_size,
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = tcp_nlattr_tuple_size,
+ .nlattr_size = TCP_NLATTR_SIZE,
.nla_policy = nf_ct_port_nla_policy,
#endif
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
@@ -1579,7 +1582,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
-struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_TCP,
@@ -1594,8 +1597,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.error = tcp_error,
.can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_size = TCP_NLATTR_SIZE,
.to_nlattr = tcp_to_nlattr,
- .nlattr_size = tcp_nlattr_size,
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3a5f727103af..fe7243970aa4 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -26,7 +26,7 @@
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
-static unsigned int udp_timeouts[UDP_CT_MAX] = {
+static const unsigned int udp_timeouts[UDP_CT_MAX] = {
[UDP_CT_UNREPLIED] = 30*HZ,
[UDP_CT_REPLIED] = 180*HZ,
};
@@ -296,7 +296,7 @@ static struct nf_proto_net *udp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.udp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDP,
@@ -328,7 +328,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_UDPLITE,
@@ -360,7 +360,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
#endif
-struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
@@ -392,7 +392,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5a101caa3e12..46d32baad095 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
WARN_ON(!l4proto);
ret = -ENOSPC;
- seq_printf(s, "%-8s %u %-8s %u %ld ",
+ seq_printf(s, "%-8s %u %-8s %u ",
l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
- l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
- nf_ct_expires(ct) / HZ);
+ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
+
+ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
l4proto->print_conntrack(s, ct);
@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
- if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ seq_puts(s, "[OFFLOAD] ");
+ else if (test_bit(IPS_ASSURED_BIT, &ct->status))
seq_puts(s, "[ASSURED] ");
if (seq_has_overflowed(s))
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c
new file mode 100644
index 000000000000..2f5099cb85b8
--- /dev/null
+++ b/net/netfilter/nf_flow_table.c
@@ -0,0 +1,429 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+struct flow_offload_entry {
+ struct flow_offload flow;
+ struct nf_conn *ct;
+ struct rcu_head rcu_head;
+};
+
+struct flow_offload *
+flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+{
+ struct flow_offload_entry *entry;
+ struct flow_offload *flow;
+
+ if (unlikely(nf_ct_is_dying(ct) ||
+ !atomic_inc_not_zero(&ct->ct_general.use)))
+ return NULL;
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ goto err_ct_refcnt;
+
+ flow = &entry->flow;
+
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
+ goto err_dst_cache_original;
+
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
+ goto err_dst_cache_reply;
+
+ entry->ct = ct;
+
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
+ case NFPROTO_IPV4:
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
+ break;
+ case NFPROTO_IPV6:
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
+ break;
+ }
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
+ FLOW_OFFLOAD_DIR_ORIGINAL;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
+ FLOW_OFFLOAD_DIR_REPLY;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
+
+ if (ct->status & IPS_SRC_NAT)
+ flow->flags |= FLOW_OFFLOAD_SNAT;
+ else if (ct->status & IPS_DST_NAT)
+ flow->flags |= FLOW_OFFLOAD_DNAT;
+
+ return flow;
+
+err_dst_cache_reply:
+ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+err_dst_cache_original:
+ kfree(entry);
+err_ct_refcnt:
+ nf_ct_put(ct);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(flow_offload_alloc);
+
+void flow_offload_free(struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+ e = container_of(flow, struct flow_offload_entry, flow);
+ kfree(e);
+}
+EXPORT_SYMBOL_GPL(flow_offload_free);
+
+void flow_offload_dead(struct flow_offload *flow)
+{
+ flow->flags |= FLOW_OFFLOAD_DYING;
+}
+EXPORT_SYMBOL_GPL(flow_offload_dead);
+
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+{
+ flow->timeout = (u32)jiffies;
+
+ rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ *flow_table->type->params);
+ rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ *flow_table->type->params);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(flow_offload_add);
+
+void flow_offload_del(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ *flow_table->type->params);
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ *flow_table->type->params);
+
+ e = container_of(flow, struct flow_offload_entry, flow);
+ kfree_rcu(e, rcu_head);
+}
+EXPORT_SYMBOL_GPL(flow_offload_del);
+
+struct flow_offload_tuple_rhash *
+flow_offload_lookup(struct nf_flowtable *flow_table,
+ struct flow_offload_tuple *tuple)
+{
+ return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+ *flow_table->type->params);
+}
+EXPORT_SYMBOL_GPL(flow_offload_lookup);
+
+static void nf_flow_release_ct(const struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ e = container_of(flow, struct flow_offload_entry, flow);
+ nf_ct_delete(e->ct, 0, 0);
+ nf_ct_put(e->ct);
+}
+
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct rhashtable_iter hti;
+ struct flow_offload *flow;
+ int err;
+
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+ if (err)
+ return err;
+
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+ goto out;
+
+ continue;
+ }
+ if (tuplehash->tuple.dir)
+ continue;
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+ iter(flow, data);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
+
+static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+{
+ return (__s32)(flow->timeout - (u32)jiffies) <= 0;
+}
+
+static inline bool nf_flow_is_dying(const struct flow_offload *flow)
+{
+ return flow->flags & FLOW_OFFLOAD_DYING;
+}
+
+void nf_flow_offload_work_gc(struct work_struct *work)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table;
+ struct rhashtable_iter hti;
+ struct flow_offload *flow;
+ int err;
+
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+ if (err)
+ goto schedule;
+
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+ goto out;
+
+ continue;
+ }
+ if (tuplehash->tuple.dir)
+ continue;
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+ if (nf_flow_has_expired(flow) ||
+ nf_flow_is_dying(flow)) {
+ flow_offload_del(flow_table, flow);
+ nf_flow_release_ct(flow);
+ }
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
+
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple *tuple = data;
+
+ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple_rhash *tuplehash = data;
+
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct flow_offload_tuple *tuple = arg->key;
+ const struct flow_offload_tuple_rhash *x = ptr;
+
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+ return 1;
+
+ return 0;
+}
+
+const struct rhashtable_params nf_flow_offload_rhash_params = {
+ .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
+ .hashfn = flow_offload_hash,
+ .obj_hashfn = flow_offload_hash_obj,
+ .obj_cmpfn = flow_offload_hash_cmp,
+ .automatic_shrinking = true,
+};
+EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
+
+static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace2(&udph->check, skb, port,
+ new_port, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, __be16 port, __be16 new_port)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+int nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
+{
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+ return -1;
+
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = hdr->source;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ hdr->source = new_port;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = hdr->dest;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ hdr->dest = new_port;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_snat_port);
+
+int nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
+{
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+ return -1;
+
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = hdr->dest;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
+ hdr->dest = new_port;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = hdr->source;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
+ hdr->source = new_port;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
new file mode 100644
index 000000000000..281209aeba8f
--- /dev/null
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -0,0 +1,48 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int
+nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return nf_flow_offload_ip_hook(priv, skb, state);
+ case htons(ETH_P_IPV6):
+ return nf_flow_offload_ipv6_hook(priv, skb, state);
+ }
+
+ return NF_ACCEPT;
+}
+
+static struct nf_flowtable_type flowtable_inet = {
+ .family = NFPROTO_INET,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_inet_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_inet_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_inet);
+
+ return 0;
+}
+
+static void __exit nf_flow_inet_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_inet);
+}
+
+module_init(nf_flow_inet_module_init);
+module_exit(nf_flow_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 44284cd2528d..18f6d7ae995b 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -10,7 +10,7 @@
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
const struct nf_hook_entries *entries, unsigned int index,
unsigned int verdict);
-unsigned int nf_queue_nf_hook_drop(struct net *net);
+void nf_queue_nf_hook_drop(struct net *net);
/* nf_log.c */
int __init netfilter_log_init(void);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index f7e21953b1de..7f55af5f3d1a 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -10,9 +10,13 @@
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
#include <net/protocol.h>
#include <net/netfilter/nf_queue.h>
#include <net/dst.h>
@@ -96,30 +100,56 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
-unsigned int nf_queue_nf_hook_drop(struct net *net)
+void nf_queue_nf_hook_drop(struct net *net)
{
const struct nf_queue_handler *qh;
- unsigned int count = 0;
rcu_read_lock();
qh = rcu_dereference(net->nf.queue_handler);
if (qh)
- count = qh->nf_hook_drop(net);
+ qh->nf_hook_drop(net);
rcu_read_unlock();
-
- return count;
}
EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
+static void nf_ip_saveroute(const struct sk_buff *skb,
+ struct nf_queue_entry *entry)
+{
+ struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ rt_info->tos = iph->tos;
+ rt_info->daddr = iph->daddr;
+ rt_info->saddr = iph->saddr;
+ rt_info->mark = skb->mark;
+ }
+}
+
+static void nf_ip6_saveroute(const struct sk_buff *skb,
+ struct nf_queue_entry *entry)
+{
+ struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ rt_info->daddr = iph->daddr;
+ rt_info->saddr = iph->saddr;
+ rt_info->mark = skb->mark;
+ }
+}
+
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
const struct nf_hook_entries *entries,
unsigned int index, unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
- const struct nf_afinfo *afinfo;
const struct nf_queue_handler *qh;
struct net *net = state->net;
+ unsigned int route_key_size;
/* QUEUE == DROP if no one is waiting, to be safe. */
qh = rcu_dereference(net->nf.queue_handler);
@@ -128,11 +158,19 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
goto err;
}
- afinfo = nf_get_afinfo(state->pf);
- if (!afinfo)
- goto err;
+ switch (state->pf) {
+ case AF_INET:
+ route_key_size = sizeof(struct ip_rt_info);
+ break;
+ case AF_INET6:
+ route_key_size = sizeof(struct ip6_rt_info);
+ break;
+ default:
+ route_key_size = 0;
+ break;
+ }
- entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
if (!entry) {
status = -ENOMEM;
goto err;
@@ -142,12 +180,21 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
.skb = skb,
.state = *state,
.hook_index = index,
- .size = sizeof(*entry) + afinfo->route_key_size,
+ .size = sizeof(*entry) + route_key_size,
};
nf_queue_entry_get_refs(entry);
skb_dst_force(skb);
- afinfo->saveroute(skb, entry);
+
+ switch (entry->state.pf) {
+ case AF_INET:
+ nf_ip_saveroute(skb, entry);
+ break;
+ case AF_INET6:
+ nf_ip6_saveroute(skb, entry);
+ break;
+ }
+
status = qh->outfn(entry, queuenum);
if (status < 0) {
@@ -204,13 +251,31 @@ repeat:
return NF_ACCEPT;
}
+static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
+{
+ switch (pf) {
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ case NFPROTO_BRIDGE:
+ return rcu_dereference(net->nf.hooks_bridge[hooknum]);
+#endif
+ case NFPROTO_IPV4:
+ return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
+ case NFPROTO_IPV6:
+ return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ return NULL;
+}
+
/* Caller must hold rcu read-side lock */
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
const struct nf_hook_entry *hook_entry;
const struct nf_hook_entries *hooks;
struct sk_buff *skb = entry->skb;
- const struct nf_afinfo *afinfo;
const struct net *net;
unsigned int i;
int err;
@@ -219,12 +284,12 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
net = entry->state.net;
pf = entry->state.pf;
- hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
+ hooks = nf_hook_entries_head(net, pf, entry->state.hook);
nf_queue_entry_release_refs(entry);
i = entry->hook_index;
- if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
+ if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
kfree_skb(skb);
kfree(entry);
return;
@@ -237,8 +302,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
if (verdict == NF_ACCEPT) {
- afinfo = nf_get_afinfo(entry->state.pf);
- if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
+ if (nf_reroute(skb, entry) < 0)
verdict = NF_DROP;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 10798b357481..336b81689ac9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -17,6 +17,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/net_namespace.h>
@@ -24,6 +25,7 @@
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
+static LIST_HEAD(nf_tables_flowtables);
/**
* nft_register_afinfo - register nf_tables address family info
@@ -139,29 +141,26 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
-static int nf_tables_register_hooks(struct net *net,
- const struct nft_table *table,
- struct nft_chain *chain,
- unsigned int hook_nops)
+static int nf_tables_register_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain)
{
if (table->flags & NFT_TABLE_F_DORMANT ||
!nft_is_base_chain(chain))
return 0;
- return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
- hook_nops);
+ return nf_register_net_hook(net, &nft_base_chain(chain)->ops);
}
-static void nf_tables_unregister_hooks(struct net *net,
- const struct nft_table *table,
- struct nft_chain *chain,
- unsigned int hook_nops)
+static void nf_tables_unregister_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain)
{
if (table->flags & NFT_TABLE_F_DORMANT ||
!nft_is_base_chain(chain))
return;
- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
+ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -348,6 +347,40 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
}
+static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_flowtable *flowtable)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type,
+ sizeof(struct nft_trans_flowtable));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWFLOWTABLE)
+ nft_activate_next(ctx->net, flowtable);
+
+ nft_trans_flowtable(trans) = flowtable;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
+static int nft_delflowtable(struct nft_ctx *ctx,
+ struct nft_flowtable *flowtable)
+{
+ int err;
+
+ err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
+ if (err < 0)
+ return err;
+
+ nft_deactivate_next(ctx->net, flowtable);
+ ctx->table->use--;
+
+ return err;
+}
+
/*
* Tables
*/
@@ -595,8 +628,7 @@ static void _nf_tables_table_disable(struct net *net,
if (cnt && i++ == cnt)
break;
- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
- afi->nops);
+ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
}
}
@@ -613,8 +645,7 @@ static int nf_tables_table_enable(struct net *net,
if (!nft_is_base_chain(chain))
continue;
- err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
- afi->nops);
+ err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
if (err < 0)
goto err;
@@ -733,6 +764,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
+ INIT_LIST_HEAD(&table->flowtables);
table->flags = flags;
nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
@@ -754,10 +786,11 @@ err1:
static int nft_flush_table(struct nft_ctx *ctx)
{
- int err;
+ struct nft_flowtable *flowtable, *nft;
struct nft_chain *chain, *nc;
struct nft_object *obj, *ne;
struct nft_set *set, *ns;
+ int err;
list_for_each_entry(chain, &ctx->table->chains, list) {
if (!nft_is_active_next(ctx->net, chain))
@@ -774,7 +807,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, set))
continue;
- if (set->flags & NFT_SET_ANONYMOUS &&
+ if (nft_set_is_anonymous(set) &&
!list_empty(&set->bindings))
continue;
@@ -783,6 +816,12 @@ static int nft_flush_table(struct nft_ctx *ctx)
goto out;
}
+ list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
+ err = nft_delflowtable(ctx, flowtable);
+ if (err < 0)
+ goto out;
+ }
+
list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
err = nft_delobj(ctx, obj);
if (err < 0)
@@ -1026,7 +1065,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
struct nlattr *nest;
nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
@@ -1227,13 +1266,13 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
static void nft_chain_stats_replace(struct nft_base_chain *chain,
struct nft_stats __percpu *newstats)
{
+ struct nft_stats __percpu *oldstats;
+
if (newstats == NULL)
return;
if (chain->stats) {
- struct nft_stats __percpu *oldstats =
- nft_dereference(chain->stats);
-
+ oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES);
rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu();
free_percpu(oldstats);
@@ -1252,8 +1291,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
free_percpu(basechain->stats);
if (basechain->stats)
static_branch_dec(&nft_counters_enabled);
- if (basechain->ops[0].dev != NULL)
- dev_put(basechain->ops[0].dev);
+ if (basechain->ops.dev != NULL)
+ dev_put(basechain->ops.dev);
kfree(chain->name);
kfree(basechain);
} else {
@@ -1264,7 +1303,7 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
struct nft_chain_hook {
u32 num;
- u32 priority;
+ s32 priority;
const struct nf_chain_type *type;
struct net_device *dev;
};
@@ -1303,6 +1342,11 @@ static int nft_chain_parse_hook(struct net *net,
}
if (!(type->hook_mask & (1 << hook->num)))
return -EOPNOTSUPP;
+
+ if (type->type == NFT_CHAIN_T_NAT &&
+ hook->priority <= NF_IP_PRI_CONNTRACK)
+ return -EOPNOTSUPP;
+
if (!try_module_get(type->owner))
return -ENOENT;
@@ -1349,7 +1393,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
struct nft_chain *chain;
- unsigned int i;
int err;
if (table->use == UINT_MAX)
@@ -1358,7 +1401,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
struct nf_hook_ops *ops;
- nf_hookfn *hookfn;
err = nft_chain_parse_hook(net, nla, afi, &hook, create);
if (err < 0)
@@ -1384,23 +1426,19 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
static_branch_inc(&nft_counters_enabled);
}
- hookfn = hook.type->hooks[hook.num];
basechain->type = hook.type;
chain = &basechain->chain;
- for (i = 0; i < afi->nops; i++) {
- ops = &basechain->ops[i];
- ops->pf = family;
- ops->hooknum = hook.num;
- ops->priority = hook.priority;
- ops->priv = chain;
- ops->hook = afi->hooks[ops->hooknum];
- ops->dev = hook.dev;
- if (hookfn)
- ops->hook = hookfn;
- if (afi->hook_ops_init)
- afi->hook_ops_init(ops, i);
- }
+ ops = &basechain->ops;
+ ops->pf = family;
+ ops->hooknum = hook.num;
+ ops->priority = hook.priority;
+ ops->priv = chain;
+ ops->hook = hook.type->hooks[ops->hooknum];
+ ops->dev = hook.dev;
+
+ if (basechain->type->type == NFT_CHAIN_T_NAT)
+ ops->nat_hook = true;
chain->flags |= NFT_BASE_CHAIN;
basechain->policy = policy;
@@ -1418,7 +1456,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err1;
}
- err = nf_tables_register_hooks(net, table, chain, afi->nops);
+ err = nf_tables_register_hook(net, table, chain);
if (err < 0)
goto err1;
@@ -1432,7 +1470,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
return 0;
err2:
- nf_tables_unregister_hooks(net, table, chain, afi->nops);
+ nf_tables_unregister_hook(net, table, chain);
err1:
nf_tables_chain_destroy(chain);
@@ -1445,14 +1483,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
struct nft_chain *chain = ctx->chain;
- struct nft_af_info *afi = ctx->afi;
struct nft_base_chain *basechain;
struct nft_stats *stats = NULL;
struct nft_chain_hook hook;
const struct nlattr *name;
struct nf_hook_ops *ops;
struct nft_trans *trans;
- int err, i;
+ int err;
if (nla[NFTA_CHAIN_HOOK]) {
if (!nft_is_base_chain(chain))
@@ -1469,14 +1506,12 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return -EBUSY;
}
- for (i = 0; i < afi->nops; i++) {
- ops = &basechain->ops[i];
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
- }
+ ops = &basechain->ops;
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority ||
+ ops->dev != hook.dev) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
}
nft_chain_release_hook(&hook);
}
@@ -2072,7 +2107,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
list_for_each_entry_rcu(chain, &table->chains, list) {
- if (ctx && ctx->chain[0] &&
+ if (ctx && ctx->chain &&
strcmp(ctx->chain, chain->name) != 0)
continue;
@@ -3277,7 +3312,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
if (binding->flags & NFT_SET_MAP) {
@@ -3312,7 +3347,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
{
list_del_rcu(&binding->list);
- if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
+ if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
nft_is_active(ctx->net, set))
nf_tables_set_destroy(ctx, set);
}
@@ -4665,8 +4700,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
struct nft_obj_filter *filter = cb->data;
- kfree(filter->table);
- kfree(filter);
+ if (filter) {
+ kfree(filter->table);
+ kfree(filter);
+ }
return 0;
}
@@ -4848,6 +4885,605 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx,
ctx->afi->family, ctx->report, GFP_KERNEL);
}
+/*
+ * Flow tables
+ */
+void nft_register_flowtable_type(struct nf_flowtable_type *type)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_add_tail_rcu(&type->list, &nf_tables_flowtables);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_register_flowtable_type);
+
+void nft_unregister_flowtable_type(struct nf_flowtable_type *type)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_del_rcu(&type->list);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type);
+
+static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
+ [NFTA_FLOWTABLE_TABLE] = { .type = NLA_STRING,
+ .len = NFT_NAME_MAXLEN - 1 },
+ [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING,
+ .len = NFT_NAME_MAXLEN - 1 },
+ [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
+};
+
+struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
+ const struct nlattr *nla,
+ u8 genmask)
+{
+ struct nft_flowtable *flowtable;
+
+ list_for_each_entry(flowtable, &table->flowtables, list) {
+ if (!nla_strcmp(nla, flowtable->name) &&
+ nft_active_genmask(flowtable, genmask))
+ return flowtable;
+ }
+ return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
+
+#define NFT_FLOWTABLE_DEVICE_MAX 8
+
+static int nf_tables_parse_devices(const struct nft_ctx *ctx,
+ const struct nlattr *attr,
+ struct net_device *dev_array[], int *len)
+{
+ const struct nlattr *tmp;
+ struct net_device *dev;
+ char ifname[IFNAMSIZ];
+ int rem, n = 0, err;
+
+ nla_for_each_nested(tmp, attr, rem) {
+ if (nla_type(tmp) != NFTA_DEVICE_NAME) {
+ err = -EINVAL;
+ goto err1;
+ }
+
+ nla_strlcpy(ifname, tmp, IFNAMSIZ);
+ dev = dev_get_by_name(ctx->net, ifname);
+ if (!dev) {
+ err = -ENOENT;
+ goto err1;
+ }
+
+ dev_array[n++] = dev;
+ if (n == NFT_FLOWTABLE_DEVICE_MAX) {
+ err = -EFBIG;
+ goto err1;
+ }
+ }
+ if (!len)
+ return -EINVAL;
+
+ err = 0;
+err1:
+ *len = n;
+ return err;
+}
+
+static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
+ [NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 },
+ [NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 },
+ [NFTA_FLOWTABLE_HOOK_DEVS] = { .type = NLA_NESTED },
+};
+
+static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
+ const struct nlattr *attr,
+ struct nft_flowtable *flowtable)
+{
+ struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
+ struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
+ struct nf_hook_ops *ops;
+ int hooknum, priority;
+ int err, n = 0, i;
+
+ err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
+ nft_flowtable_hook_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
+ !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] ||
+ !tb[NFTA_FLOWTABLE_HOOK_DEVS])
+ return -EINVAL;
+
+ hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
+ if (hooknum >= ctx->afi->nhooks)
+ return -EINVAL;
+
+ priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
+
+ err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
+ dev_array, &n);
+ if (err < 0)
+ goto err1;
+
+ ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
+ if (!ops) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ flowtable->ops = ops;
+ flowtable->ops_len = n;
+
+ for (i = 0; i < n; i++) {
+ flowtable->ops[i].pf = NFPROTO_NETDEV;
+ flowtable->ops[i].hooknum = hooknum;
+ flowtable->ops[i].priority = priority;
+ flowtable->ops[i].priv = &flowtable->data.rhashtable;
+ flowtable->ops[i].hook = flowtable->data.type->hook;
+ flowtable->ops[i].dev = dev_array[i];
+ }
+
+ err = 0;
+err1:
+ for (i = 0; i < n; i++)
+ dev_put(dev_array[i]);
+
+ return err;
+}
+
+static const struct nf_flowtable_type *
+__nft_flowtable_type_get(const struct nft_af_info *afi)
+{
+ const struct nf_flowtable_type *type;
+
+ list_for_each_entry(type, &nf_tables_flowtables, list) {
+ if (afi->family == type->family)
+ return type;
+ }
+ return NULL;
+}
+
+static const struct nf_flowtable_type *
+nft_flowtable_type_get(const struct nft_af_info *afi)
+{
+ const struct nf_flowtable_type *type;
+
+ type = __nft_flowtable_type_get(afi);
+ if (type != NULL && try_module_get(type->owner))
+ return type;
+
+#ifdef CONFIG_MODULES
+ if (type == NULL) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nf-flowtable-%u", afi->family);
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (__nft_flowtable_type_get(afi))
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ return ERR_PTR(-ENOENT);
+}
+
+void nft_flow_table_iterate(struct net *net,
+ void (*iter)(struct nf_flowtable *flowtable, void *data),
+ void *data)
+{
+ struct nft_flowtable *flowtable;
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+ list_for_each_entry_rcu(table, &afi->tables, list) {
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ iter(&flowtable->data, data);
+ }
+ }
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
+
+static void nft_unregister_flowtable_net_hooks(struct net *net,
+ struct nft_flowtable *flowtable)
+{
+ int i;
+
+ for (i = 0; i < flowtable->ops_len; i++) {
+ if (!flowtable->ops[i].dev)
+ continue;
+
+ nf_unregister_net_hook(net, &flowtable->ops[i]);
+ }
+}
+
+static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nf_flowtable_type *type;
+ u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_ctx ctx;
+ int err, i, k;
+
+ if (!nla[NFTA_FLOWTABLE_TABLE] ||
+ !nla[NFTA_FLOWTABLE_NAME] ||
+ !nla[NFTA_FLOWTABLE_HOOK])
+ return -EINVAL;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ genmask);
+ if (IS_ERR(flowtable)) {
+ err = PTR_ERR(flowtable);
+ if (err != -ENOENT)
+ return err;
+ } else {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+
+ return 0;
+ }
+
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+
+ flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
+ if (!flowtable)
+ return -ENOMEM;
+
+ flowtable->table = table;
+ flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
+ if (!flowtable->name) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ type = nft_flowtable_type_get(afi);
+ if (IS_ERR(type)) {
+ err = PTR_ERR(type);
+ goto err2;
+ }
+
+ flowtable->data.type = type;
+ err = rhashtable_init(&flowtable->data.rhashtable, type->params);
+ if (err < 0)
+ goto err3;
+
+ err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
+ flowtable);
+ if (err < 0)
+ goto err3;
+
+ for (i = 0; i < flowtable->ops_len; i++) {
+ err = nf_register_net_hook(net, &flowtable->ops[i]);
+ if (err < 0)
+ goto err4;
+ }
+
+ err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+ if (err < 0)
+ goto err5;
+
+ INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
+ queue_delayed_work(system_power_efficient_wq,
+ &flowtable->data.gc_work, HZ);
+
+ list_add_tail_rcu(&flowtable->list, &table->flowtables);
+ table->use++;
+
+ return 0;
+err5:
+ i = flowtable->ops_len;
+err4:
+ for (k = i - 1; k >= 0; k--)
+ nf_unregister_net_hook(net, &flowtable->ops[i]);
+
+ kfree(flowtable->ops);
+err3:
+ module_put(type->owner);
+err2:
+ kfree(flowtable->name);
+err1:
+ kfree(flowtable);
+ return err;
+}
+
+static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_ctx ctx;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ genmask);
+ if (IS_ERR(flowtable))
+ return PTR_ERR(flowtable);
+ if (flowtable->use > 0)
+ return -EBUSY;
+
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+
+ return nft_delflowtable(&ctx, flowtable);
+}
+
+static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event,
+ u32 flags, int family,
+ struct nft_flowtable *flowtable)
+{
+ struct nlattr *nest, *nest_devs;
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ int i;
+
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
+
+ if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
+ nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
+ nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
+ if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
+ nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
+ goto nla_put_failure;
+
+ nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS);
+ if (!nest_devs)
+ goto nla_put_failure;
+
+ for (i = 0; i < flowtable->ops_len; i++) {
+ if (flowtable->ops[i].dev &&
+ nla_put_string(skb, NFTA_DEVICE_NAME,
+ flowtable->ops[i].dev->name))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest_devs);
+ nla_nest_end(skb, nest);
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+struct nft_flowtable_filter {
+ char *table;
+};
+
+static int nf_tables_dump_flowtable(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ struct nft_flowtable_filter *filter = cb->data;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+
+ rcu_read_lock();
+ cb->seq = net->nft.base_seq;
+
+ list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
+ if (family != NFPROTO_UNSPEC && family != afi->family)
+ continue;
+
+ list_for_each_entry_rcu(table, &afi->tables, list) {
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ if (!nft_is_active(net, flowtable))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (filter && filter->table[0] &&
+ strcmp(filter->table, table->name))
+ goto cont;
+
+ if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWFLOWTABLE,
+ NLM_F_MULTI | NLM_F_APPEND,
+ afi->family, flowtable) < 0)
+ goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ }
+ }
+done:
+ rcu_read_unlock();
+
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
+{
+ struct nft_flowtable_filter *filter = cb->data;
+
+ if (!filter)
+ return 0;
+
+ kfree(filter->table);
+ kfree(filter);
+
+ return 0;
+}
+
+static struct nft_flowtable_filter *
+nft_flowtable_filter_alloc(const struct nlattr * const nla[])
+{
+ struct nft_flowtable_filter *filter;
+
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter)
+ return ERR_PTR(-ENOMEM);
+
+ if (nla[NFTA_FLOWTABLE_TABLE]) {
+ filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
+ GFP_KERNEL);
+ if (!filter->table) {
+ kfree(filter);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+ return filter;
+}
+
+static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct netlink_ext_ack *extack)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_cur(net);
+ int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ struct sk_buff *skb2;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_flowtable,
+ .done = nf_tables_dump_flowtable_done,
+ };
+
+ if (nla[NFTA_FLOWTABLE_TABLE]) {
+ struct nft_flowtable_filter *filter;
+
+ filter = nft_flowtable_filter_alloc(nla);
+ if (IS_ERR(filter))
+ return -ENOMEM;
+
+ c.data = filter;
+ }
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ if (!nla[NFTA_FLOWTABLE_NAME])
+ return -EINVAL;
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ genmask);
+ if (IS_ERR(table))
+ return PTR_ERR(flowtable);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq,
+ NFT_MSG_NEWFLOWTABLE, 0, family,
+ flowtable);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
+ struct nft_flowtable *flowtable,
+ int event)
+{
+ struct sk_buff *skb;
+ int err;
+
+ if (ctx->report &&
+ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+ return;
+
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
+ ctx->seq, event, 0,
+ ctx->afi->family, flowtable);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
+ return;
+err:
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
+}
+
+static void nft_flowtable_destroy(void *ptr, void *arg)
+{
+ kfree(ptr);
+}
+
+static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
+{
+ cancel_delayed_work_sync(&flowtable->data.gc_work);
+ kfree(flowtable->name);
+ rhashtable_free_and_destroy(&flowtable->data.rhashtable,
+ nft_flowtable_destroy, NULL);
+ module_put(flowtable->data.type->owner);
+}
+
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
@@ -4878,6 +5514,49 @@ nla_put_failure:
return -EMSGSIZE;
}
+static void nft_flowtable_event(unsigned long event, struct net_device *dev,
+ struct nft_flowtable *flowtable)
+{
+ int i;
+
+ for (i = 0; i < flowtable->ops_len; i++) {
+ if (flowtable->ops[i].dev != dev)
+ continue;
+
+ nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
+ flowtable->ops[i].dev = NULL;
+ break;
+ }
+}
+
+static int nf_tables_flowtable_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nft_flowtable *flowtable;
+ struct nft_table *table;
+ struct nft_af_info *afi;
+
+ if (event != NETDEV_UNREGISTER)
+ return 0;
+
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
+ list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry(flowtable, &table->flowtables, list) {
+ nft_flowtable_event(event, dev, flowtable);
+ }
+ }
+ }
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nf_tables_flowtable_notifier = {
+ .notifier_call = nf_tables_flowtable_event,
+};
+
static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
int event)
{
@@ -5030,6 +5709,21 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
+ [NFT_MSG_NEWFLOWTABLE] = {
+ .call_batch = nf_tables_newflowtable,
+ .attr_count = NFTA_FLOWTABLE_MAX,
+ .policy = nft_flowtable_policy,
+ },
+ [NFT_MSG_GETFLOWTABLE] = {
+ .call = nf_tables_getflowtable,
+ .attr_count = NFTA_FLOWTABLE_MAX,
+ .policy = nft_flowtable_policy,
+ },
+ [NFT_MSG_DELFLOWTABLE] = {
+ .call_batch = nf_tables_delflowtable,
+ .attr_count = NFTA_FLOWTABLE_MAX,
+ .policy = nft_flowtable_policy,
+ },
};
static void nft_chain_commit_update(struct nft_trans *trans)
@@ -5075,6 +5769,9 @@ static void nf_tables_commit_release(struct nft_trans *trans)
case NFT_MSG_DELOBJ:
nft_obj_destroy(nft_trans_obj(trans));
break;
+ case NFT_MSG_DELFLOWTABLE:
+ nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
+ break;
}
kfree(trans);
}
@@ -5127,10 +5824,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELCHAIN:
list_del_rcu(&trans->ctx.chain->list);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
- nf_tables_unregister_hooks(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain,
- trans->ctx.afi->nops);
+ nf_tables_unregister_hook(trans->ctx.net,
+ trans->ctx.table,
+ trans->ctx.chain);
break;
case NFT_MSG_NEWRULE:
nft_clear(trans->ctx.net, nft_trans_rule(trans));
@@ -5150,7 +5846,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
/* This avoids hitting -EBUSY when deleting the table
* from the transaction.
*/
- if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS &&
+ if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
trans->ctx.table->use--;
@@ -5193,6 +5889,21 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
NFT_MSG_DELOBJ);
break;
+ case NFT_MSG_NEWFLOWTABLE:
+ nft_clear(net, nft_trans_flowtable(trans));
+ nf_tables_flowtable_notify(&trans->ctx,
+ nft_trans_flowtable(trans),
+ NFT_MSG_NEWFLOWTABLE);
+ nft_trans_destroy(trans);
+ break;
+ case NFT_MSG_DELFLOWTABLE:
+ list_del_rcu(&nft_trans_flowtable(trans)->list);
+ nf_tables_flowtable_notify(&trans->ctx,
+ nft_trans_flowtable(trans),
+ NFT_MSG_DELFLOWTABLE);
+ nft_unregister_flowtable_net_hooks(net,
+ nft_trans_flowtable(trans));
+ break;
}
}
@@ -5230,6 +5941,9 @@ static void nf_tables_abort_release(struct nft_trans *trans)
case NFT_MSG_NEWOBJ:
nft_obj_destroy(nft_trans_obj(trans));
break;
+ case NFT_MSG_NEWFLOWTABLE:
+ nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
+ break;
}
kfree(trans);
}
@@ -5267,10 +5981,9 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
} else {
trans->ctx.table->use--;
list_del_rcu(&trans->ctx.chain->list);
- nf_tables_unregister_hooks(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain,
- trans->ctx.afi->nops);
+ nf_tables_unregister_hook(trans->ctx.net,
+ trans->ctx.table,
+ trans->ctx.chain);
}
break;
case NFT_MSG_DELCHAIN:
@@ -5320,6 +6033,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
nft_clear(trans->ctx.net, nft_trans_obj(trans));
nft_trans_destroy(trans);
break;
+ case NFT_MSG_NEWFLOWTABLE:
+ trans->ctx.table->use--;
+ list_del_rcu(&nft_trans_flowtable(trans)->list);
+ nft_unregister_flowtable_net_hooks(net,
+ nft_trans_flowtable(trans));
+ break;
+ case NFT_MSG_DELFLOWTABLE:
+ trans->ctx.table->use++;
+ nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+ nft_trans_destroy(trans);
+ break;
}
}
@@ -5371,7 +6095,7 @@ int nft_chain_validate_hooks(const struct nft_chain *chain,
if (nft_is_base_chain(chain)) {
basechain = nft_base_chain(chain);
- if ((1 << basechain->ops[0].hooknum) & hook_flags)
+ if ((1 << basechain->ops.hooknum) & hook_flags)
return 0;
return -EOPNOTSUPP;
@@ -5859,8 +6583,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
BUG_ON(!nft_is_base_chain(ctx->chain));
- nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
- ctx->afi->nops);
+ nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
ctx->chain->use--;
@@ -5877,6 +6600,7 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain);
/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
{
+ struct nft_flowtable *flowtable, *nf;
struct nft_table *table, *nt;
struct nft_chain *chain, *nc;
struct nft_object *obj, *ne;
@@ -5889,8 +6613,10 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
list_for_each_entry_safe(table, nt, &afi->tables, list) {
list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hooks(net, table, chain,
- afi->nops);
+ nf_tables_unregister_hook(net, table, chain);
+ list_for_each_entry(flowtable, &table->flowtables, list)
+ nf_unregister_net_hooks(net, flowtable->ops,
+ flowtable->ops_len);
/* No packets are walking on these chains anymore. */
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
@@ -5901,6 +6627,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
nf_tables_rule_destroy(&ctx, rule);
}
}
+ list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
+ list_del(&flowtable->list);
+ table->use--;
+ nf_tables_flowtable_destroy(flowtable);
+ }
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
table->use--;
@@ -5945,6 +6676,8 @@ static int __init nf_tables_module_init(void)
if (err < 0)
goto err3;
+ register_netdevice_notifier(&nf_tables_flowtable_notifier);
+
pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
return register_pernet_subsys(&nf_tables_net_ops);
err3:
@@ -5959,6 +6692,7 @@ static void __exit nf_tables_module_exit(void)
{
unregister_pernet_subsys(&nf_tables_net_ops);
nfnetlink_subsys_unregister(&nf_tables_subsys);
+ unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
rcu_barrier();
nf_tables_core_module_exit();
kfree(info);
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index f713cc205669..58b9be7480bb 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_tables.h>
@@ -16,26 +17,31 @@
#include <net/netfilter/nf_tables_ipv6.h>
#include <net/ip.h>
-static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n)
+static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
- struct nft_af_info *afi;
-
- if (n == 1)
- afi = &nft_af_ipv4;
- else
- afi = &nft_af_ipv6;
-
- ops->pf = afi->family;
- if (afi->hooks[ops->hooknum])
- ops->hook = afi->hooks[ops->hooknum];
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+
+ switch (state->pf) {
+ case NFPROTO_IPV4:
+ nft_set_pktinfo_ipv4(&pkt, skb);
+ break;
+ case NFPROTO_IPV6:
+ nft_set_pktinfo_ipv6(&pkt, skb);
+ break;
+ default:
+ break;
+ }
+
+ return nft_do_chain(&pkt, priv);
}
static struct nft_af_info nft_af_inet __read_mostly = {
.family = NFPROTO_INET,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 2,
- .hook_ops_init = nft_inet_hook_ops_init,
};
static int __net_init nf_tables_inet_init_net(struct net *net)
@@ -76,6 +82,13 @@ static const struct nf_chain_type filter_inet = {
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_inet,
+ [NF_INET_FORWARD] = nft_do_chain_inet,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
+ [NF_INET_POST_ROUTING] = nft_do_chain_inet,
+ },
};
static int __init nf_tables_inet_init(void)
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 403432988313..42f6f6d42a6d 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -21,15 +21,17 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb,
{
struct nft_pktinfo pkt;
+ nft_set_pktinfo(&pkt, skb, state);
+
switch (skb->protocol) {
case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
break;
case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
break;
default:
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
break;
}
@@ -41,10 +43,6 @@ static struct nft_af_info nft_af_netdev __read_mostly = {
.nhooks = NF_NETDEV_NUMHOOKS,
.owner = THIS_MODULE,
.flags = NFT_AF_NEEDS_DEV,
- .nops = 1,
- .hooks = {
- [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
- },
};
static int nf_tables_netdev_init_net(struct net *net)
@@ -81,6 +79,9 @@ static const struct nf_chain_type nft_filter_chain_netdev = {
.family = NFPROTO_NETDEV,
.owner = THIS_MODULE,
.hook_mask = (1 << NF_NETDEV_INGRESS),
+ .hooks = {
+ [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+ },
};
static void nft_netdev_event(unsigned long event, struct net_device *dev,
@@ -96,7 +97,7 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
__nft_release_basechain(ctx);
break;
case NETDEV_CHANGENAME:
- if (dev->ifindex != basechain->ops[0].dev->ifindex)
+ if (dev->ifindex != basechain->ops.dev->ifindex)
return;
strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index c09b36755ed7..2db35f2d553d 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -941,23 +941,18 @@ static struct notifier_block nfqnl_dev_notifier = {
.notifier_call = nfqnl_rcv_dev_event,
};
-static unsigned int nfqnl_nf_hook_drop(struct net *net)
+static void nfqnl_nf_hook_drop(struct net *net)
{
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
- unsigned int instances = 0;
int i;
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
- hlist_for_each_entry_rcu(inst, head, hlist) {
+ hlist_for_each_entry_rcu(inst, head, hlist)
nfqnl_flush(inst, NULL, 0);
- instances++;
- }
}
-
- return instances;
}
static int
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index c2945eb3397c..fa90a8402845 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -44,6 +44,7 @@ static void nft_cmp_eval(const struct nft_expr *expr,
case NFT_CMP_LT:
if (d == 0)
goto mismatch;
+ /* fall through */
case NFT_CMP_LTE:
if (d > 0)
goto mismatch;
@@ -51,6 +52,7 @@ static void nft_cmp_eval(const struct nft_expr *expr,
case NFT_CMP_GT:
if (d == 0)
goto mismatch;
+ /* fall through */
case NFT_CMP_GTE:
if (d < 0)
goto mismatch;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index b89f4f65b2a0..dcff0dc8d28b 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -169,7 +169,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
par->hook_mask = 1 << ops->hooknum;
} else {
@@ -302,7 +302,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
hook_mask = 1 << ops->hooknum;
if (target->hooks && !(hook_mask & target->hooks))
@@ -383,7 +383,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
par->hook_mask = 1 << ops->hooknum;
} else {
@@ -481,7 +481,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
hook_mask = 1 << ops->hooknum;
if (match->hooks && !(hook_mask & match->hooks))
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 66221ad891a9..ec0fd78231d8 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -184,7 +184,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (tb[NFTA_DYNSET_EXPR] != NULL) {
if (!(set->flags & NFT_SET_EVAL))
return -EINVAL;
- if (!(set->flags & NFT_SET_ANONYMOUS))
+ if (!nft_set_is_anonymous(set))
return -EOPNOTSUPP;
priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
new file mode 100644
index 000000000000..dd38785dfed9
--- /dev/null
+++ b/net/netfilter/nft_flow_offload.c
@@ -0,0 +1,264 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/ip.h> /* for ipv4 options. */
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_flow_table.h>
+
+struct nft_flow_offload {
+ struct nft_flowtable *flowtable;
+};
+
+static int nft_flow_route(const struct nft_pktinfo *pkt,
+ const struct nf_conn *ct,
+ struct nf_flow_route *route,
+ enum ip_conntrack_dir dir)
+{
+ struct dst_entry *this_dst = skb_dst(pkt->skb);
+ struct dst_entry *other_dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+ break;
+ }
+
+ nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
+ if (!other_dst)
+ return -ENOENT;
+
+ route->tuple[dir].dst = this_dst;
+ route->tuple[dir].ifindex = nft_in(pkt)->ifindex;
+ route->tuple[!dir].dst = other_dst;
+ route->tuple[!dir].ifindex = nft_out(pkt)->ifindex;
+
+ return 0;
+}
+
+static bool nft_flow_offload_skip(struct sk_buff *skb)
+{
+ struct ip_options *opt = &(IPCB(skb)->opt);
+
+ if (unlikely(opt->optlen))
+ return true;
+ if (skb_sec_path(skb))
+ return true;
+
+ return false;
+}
+
+static void nft_flow_offload_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+ struct nf_flowtable *flowtable = &priv->flowtable->data;
+ enum ip_conntrack_info ctinfo;
+ struct nf_flow_route route;
+ struct flow_offload *flow;
+ enum ip_conntrack_dir dir;
+ struct nf_conn *ct;
+ int ret;
+
+ if (nft_flow_offload_skip(pkt->skb))
+ goto out;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+ if (!ct)
+ goto out;
+
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ goto out;
+ }
+
+ if (test_bit(IPS_HELPER_BIT, &ct->status))
+ goto out;
+
+ if (ctinfo == IP_CT_NEW ||
+ ctinfo == IP_CT_RELATED)
+ goto out;
+
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
+ goto out;
+
+ dir = CTINFO2DIR(ctinfo);
+ if (nft_flow_route(pkt, ct, &route, dir) < 0)
+ goto err_flow_route;
+
+ flow = flow_offload_alloc(ct, &route);
+ if (!flow)
+ goto err_flow_alloc;
+
+ ret = flow_offload_add(flowtable, flow);
+ if (ret < 0)
+ goto err_flow_add;
+
+ return;
+
+err_flow_add:
+ flow_offload_free(flow);
+err_flow_alloc:
+ dst_release(route.tuple[!dir].dst);
+err_flow_route:
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+out:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_flow_offload_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ unsigned int hook_mask = (1 << NF_INET_FORWARD);
+
+ return nft_chain_validate_hooks(ctx->chain, hook_mask);
+}
+
+static int nft_flow_offload_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_flowtable *flowtable;
+
+ if (!tb[NFTA_FLOW_TABLE_NAME])
+ return -EINVAL;
+
+ flowtable = nf_tables_flowtable_lookup(ctx->table,
+ tb[NFTA_FLOW_TABLE_NAME],
+ genmask);
+ if (IS_ERR(flowtable))
+ return PTR_ERR(flowtable);
+
+ priv->flowtable = flowtable;
+ flowtable->use++;
+
+ return nf_ct_netns_get(ctx->net, ctx->afi->family);
+}
+
+static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+ priv->flowtable->use--;
+ nf_ct_netns_put(ctx->net, ctx->afi->family);
+}
+
+static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+ if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_flow_offload_type;
+static const struct nft_expr_ops nft_flow_offload_ops = {
+ .type = &nft_flow_offload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
+ .eval = nft_flow_offload_eval,
+ .init = nft_flow_offload_init,
+ .destroy = nft_flow_offload_destroy,
+ .validate = nft_flow_offload_validate,
+ .dump = nft_flow_offload_dump,
+};
+
+static struct nft_expr_type nft_flow_offload_type __read_mostly = {
+ .name = "flow_offload",
+ .ops = &nft_flow_offload_ops,
+ .maxattr = NFTA_FLOW_MAX,
+ .owner = THIS_MODULE,
+};
+
+static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
+{
+ struct net_device *dev = data;
+
+ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+ return;
+
+ flow_offload_dead(flow);
+}
+
+static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
+ void *data)
+{
+ nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
+}
+
+static int flow_offload_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
+
+ nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block flow_offload_netdev_notifier = {
+ .notifier_call = flow_offload_netdev_event,
+};
+
+static int __init nft_flow_offload_module_init(void)
+{
+ int err;
+
+ register_netdevice_notifier(&flow_offload_netdev_notifier);
+
+ err = nft_register_expr(&nft_flow_offload_type);
+ if (err < 0)
+ goto register_expr;
+
+ return 0;
+
+register_expr:
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ return err;
+}
+
+static void __exit nft_flow_offload_module_exit(void)
+{
+ struct net *net;
+
+ nft_unregister_expr(&nft_flow_offload_type);
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ rtnl_lock();
+ for_each_net(net)
+ nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
+ rtnl_unlock();
+}
+
+module_init(nft_flow_offload_module_init);
+module_exit(nft_flow_offload_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("flow_offload");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5a60eb23a7ed..1a91e676f13e 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -210,6 +210,11 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*dest = prandom_u32_state(state);
break;
}
+#ifdef CONFIG_XFRM
+ case NFT_META_SECPATH:
+ nft_reg_store8(dest, !!skb->sp);
+ break;
+#endif
default:
WARN_ON(1);
goto err;
@@ -308,6 +313,11 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
prandom_init_once(&nft_prandom_state);
len = sizeof(u32);
break;
+#ifdef CONFIG_XFRM
+ case NFT_META_SECPATH:
+ len = sizeof(u8);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -318,6 +328,38 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
+static int nft_meta_get_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+#ifdef CONFIG_XFRM
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ if (priv->key != NFT_META_SECPATH)
+ return 0;
+
+ switch (ctx->afi->family) {
+ case NFPROTO_NETDEV:
+ hooks = 1 << NF_NETDEV_INGRESS;
+ break;
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+#else
+ return 0;
+#endif
+}
+
int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
@@ -434,6 +476,7 @@ static const struct nft_expr_ops nft_meta_get_ops = {
.eval = nft_meta_get_eval,
.init = nft_meta_get_init,
.dump = nft_meta_get_dump,
+ .validate = nft_meta_get_validate,
};
static const struct nft_expr_ops nft_meta_set_ops = {
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index a6b7d05aeacf..11a2071b6dd4 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -27,7 +27,7 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb
{
u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
const struct sk_buff *skb = pkt->skb;
- const struct nf_afinfo *ai;
+ struct dst_entry *dst = NULL;
struct flowi fl;
memset(&fl, 0, sizeof(fl));
@@ -43,15 +43,10 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb
break;
}
- ai = nf_get_afinfo(nft_pf(pkt));
- if (ai) {
- struct dst_entry *dst = NULL;
-
- ai->route(nft_net(pkt), &dst, &fl, false);
- if (dst) {
- mtu = min(mtu, dst_mtu(dst));
- dst_release(dst);
- }
+ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
+ if (dst) {
+ mtu = min(mtu, dst_mtu(dst));
+ dst_release(dst);
}
if (mtu <= minlen || mtu > 0xffff)
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
new file mode 100644
index 000000000000..0b660c568156
--- /dev/null
+++ b/net/netfilter/utils.c
@@ -0,0 +1,90 @@
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_queue.h>
+
+__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol,
+ unsigned short family)
+{
+ const struct nf_ipv6_ops *v6ops;
+ __sum16 csum = 0;
+
+ switch (family) {
+ case AF_INET:
+ csum = nf_ip_checksum(skb, hook, dataoff, protocol);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ csum = v6ops->checksum(skb, hook, dataoff, protocol);
+ break;
+ }
+
+ return csum;
+}
+EXPORT_SYMBOL_GPL(nf_checksum);
+
+__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol, unsigned short family)
+{
+ const struct nf_ipv6_ops *v6ops;
+ __sum16 csum = 0;
+
+ switch (family) {
+ case AF_INET:
+ csum = nf_ip_checksum_partial(skb, hook, dataoff, len,
+ protocol);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ csum = v6ops->checksum_partial(skb, hook, dataoff, len,
+ protocol);
+ break;
+ }
+
+ return csum;
+}
+EXPORT_SYMBOL_GPL(nf_checksum_partial);
+
+int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict, unsigned short family)
+{
+ const struct nf_ipv6_ops *v6ops;
+ int ret = 0;
+
+ switch (family) {
+ case AF_INET:
+ ret = nf_ip_route(net, dst, fl, strict);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ ret = v6ops->route(net, dst, fl, strict);
+ break;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_route);
+
+int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
+{
+ const struct nf_ipv6_ops *v6ops;
+ int ret = 0;
+
+ switch (entry->state.pf) {
+ case AF_INET:
+ ret = nf_ip_reroute(skb, entry);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ ret = v6ops->reroute(skb, entry);
+ break;
+ }
+ return ret;
+}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 55802e97f906..10c19a3f4cbd 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1027,7 +1027,7 @@ void xt_free_table_info(struct xt_table_info *info)
}
EXPORT_SYMBOL(xt_free_table_info);
-/* Find table by name, grabs mutex & ref. Returns NULL on error. */
+/* Find table by name, grabs mutex & ref. Returns ERR_PTR on error. */
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
{
@@ -1043,17 +1043,17 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
/* Table doesn't exist in this netns, re-try init */
list_for_each_entry(t, &init_net.xt.tables[af], list) {
+ int err;
+
if (strcmp(t->name, name))
continue;
- if (!try_module_get(t->me)) {
- mutex_unlock(&xt[af].mutex);
- return NULL;
- }
-
+ if (!try_module_get(t->me))
+ goto out;
mutex_unlock(&xt[af].mutex);
- if (t->table_init(net) != 0) {
+ err = t->table_init(net);
+ if (err < 0) {
module_put(t->me);
- return NULL;
+ return ERR_PTR(err);
}
found = t;
@@ -1073,10 +1073,28 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
module_put(found->me);
out:
mutex_unlock(&xt[af].mutex);
- return NULL;
+ return ERR_PTR(-ENOENT);
}
EXPORT_SYMBOL_GPL(xt_find_table_lock);
+struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
+ const char *name)
+{
+ struct xt_table *t = xt_find_table_lock(net, af, name);
+
+#ifdef CONFIG_MODULE
+ if (IS_ERR(t)) {
+ int err = request_module("%stable_%s", xt_prefix[af], name);
+ if (err)
+ return ERR_PTR(err);
+ t = xt_find_table_lock(net, af, name);
+ }
+#endif
+
+ return t;
+}
+EXPORT_SYMBOL_GPL(xt_request_find_table_lock);
+
void xt_table_unlock(struct xt_table *table)
{
mutex_unlock(&xt[table->af].mutex);
@@ -1397,7 +1415,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
trav->curr = trav->curr->next;
if (trav->curr != trav->head)
break;
- /* fallthru, _stop will unlock */
+ /* fall through */
default:
return NULL;
}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 9dae4d665965..99bb8e410f22 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -48,7 +48,6 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
unsigned int family)
{
struct flowi fl;
- const struct nf_afinfo *ai;
struct rtable *rt = NULL;
u_int32_t mtu = ~0U;
@@ -62,10 +61,8 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
memset(fl6, 0, sizeof(*fl6));
fl6->daddr = ipv6_hdr(skb)->saddr;
}
- ai = nf_get_afinfo(family);
- if (ai != NULL)
- ai->route(net, (struct dst_entry **)&rt, &fl, false);
+ nf_route(net, (struct dst_entry **)&rt, &fl, false, family);
if (rt != NULL) {
mtu = dst_mtu(&rt->dst);
dst_release(&rt->dst);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 3b2be2ae6987..911a7c0da504 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip6t_addrtype");
static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
const struct in6_addr *addr, u16 mask)
{
- const struct nf_afinfo *afinfo;
+ const struct nf_ipv6_ops *v6ops;
struct flowi6 flow;
struct rt6_info *rt;
u32 ret = 0;
@@ -47,17 +47,14 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev)
flow.flowi6_oif = dev->ifindex;
- afinfo = nf_get_afinfo(NFPROTO_IPV6);
- if (afinfo != NULL) {
- const struct nf_ipv6_ops *v6ops;
-
+ v6ops = nf_get_ipv6_ops();
+ if (v6ops) {
if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
- v6ops = nf_get_ipv6_ops();
- if (v6ops && v6ops->chk_addr(net, addr, dev, true))
+ if (v6ops->chk_addr(net, addr, dev, true))
ret = XT_ADDRTYPE_LOCAL;
}
- route_err = afinfo->route(net, (struct dst_entry **)&rt,
- flowi6_to_flowi(&flow), false);
+ route_err = v6ops->route(net, (struct dst_entry **)&rt,
+ flowi6_to_flowi(&flow), false);
} else {
route_err = 1;
}
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 1f7fbd3c7e5a..06b090d8e901 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -55,21 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
{
- mm_segment_t oldfs = get_fs();
- int retval, fd;
-
if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
return -EINVAL;
- set_fs(KERNEL_DS);
- fd = bpf_obj_get_user(path, 0);
- set_fs(oldfs);
- if (fd < 0)
- return fd;
-
- retval = __bpf_mt_check_fd(fd, ret);
- sys_close(fd);
- return retval;
+ *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+ return PTR_ERR_OR_ZERO(*ret);
}
static int bpf_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index a6214f235333..b1b17b9353e1 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -12,292 +12,30 @@
* GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au).
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/jhash.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/rbtree.h>
+
#include <linux/module.h>
-#include <linux/random.h>
#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_connlimit.h>
+
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_zones.h>
-
-#define CONNLIMIT_SLOTS 256U
-
-#ifdef CONFIG_LOCKDEP
-#define CONNLIMIT_LOCK_SLOTS 8U
-#else
-#define CONNLIMIT_LOCK_SLOTS 256U
-#endif
-
-#define CONNLIMIT_GC_MAX_NODES 8
-
-/* we will save the tuples of all connections we care about */
-struct xt_connlimit_conn {
- struct hlist_node node;
- struct nf_conntrack_tuple tuple;
-};
-
-struct xt_connlimit_rb {
- struct rb_node node;
- struct hlist_head hhead; /* connections/hosts in same subnet */
- union nf_inet_addr addr; /* search key */
-};
-
-static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
-
-struct xt_connlimit_data {
- struct rb_root climit_root[CONNLIMIT_SLOTS];
-};
-
-static u_int32_t connlimit_rnd __read_mostly;
-static struct kmem_cache *connlimit_rb_cachep __read_mostly;
-static struct kmem_cache *connlimit_conn_cachep __read_mostly;
-
-static inline unsigned int connlimit_iphash(__be32 addr)
-{
- return jhash_1word((__force __u32)addr,
- connlimit_rnd) % CONNLIMIT_SLOTS;
-}
-
-static inline unsigned int
-connlimit_iphash6(const union nf_inet_addr *addr)
-{
- return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
- connlimit_rnd) % CONNLIMIT_SLOTS;
-}
-
-static inline bool already_closed(const struct nf_conn *conn)
-{
- if (nf_ct_protonum(conn) == IPPROTO_TCP)
- return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
- conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
- else
- return 0;
-}
-
-static int
-same_source(const union nf_inet_addr *addr,
- const union nf_inet_addr *u3, u_int8_t family)
-{
- if (family == NFPROTO_IPV4)
- return ntohl(addr->ip) - ntohl(u3->ip);
-
- return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
-}
-
-static bool add_hlist(struct hlist_head *head,
- const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr)
-{
- struct xt_connlimit_conn *conn;
-
- conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
- if (conn == NULL)
- return false;
- conn->tuple = *tuple;
- hlist_add_head(&conn->node, head);
- return true;
-}
-
-static unsigned int check_hlist(struct net *net,
- struct hlist_head *head,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone,
- bool *addit)
-{
- const struct nf_conntrack_tuple_hash *found;
- struct xt_connlimit_conn *conn;
- struct hlist_node *n;
- struct nf_conn *found_ct;
- unsigned int length = 0;
-
- *addit = true;
-
- /* check the saved connections */
- hlist_for_each_entry_safe(conn, n, head, node) {
- found = nf_conntrack_find_get(net, zone, &conn->tuple);
- if (found == NULL) {
- hlist_del(&conn->node);
- kmem_cache_free(connlimit_conn_cachep, conn);
- continue;
- }
-
- found_ct = nf_ct_tuplehash_to_ctrack(found);
-
- if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
- /*
- * Just to be sure we have it only once in the list.
- * We should not see tuples twice unless someone hooks
- * this into a table without "-p tcp --syn".
- */
- *addit = false;
- } else if (already_closed(found_ct)) {
- /*
- * we do not care about connections which are
- * closed already -> ditch it
- */
- nf_ct_put(found_ct);
- hlist_del(&conn->node);
- kmem_cache_free(connlimit_conn_cachep, conn);
- continue;
- }
-
- nf_ct_put(found_ct);
- length++;
- }
-
- return length;
-}
-
-static void tree_nodes_free(struct rb_root *root,
- struct xt_connlimit_rb *gc_nodes[],
- unsigned int gc_count)
-{
- struct xt_connlimit_rb *rbconn;
-
- while (gc_count) {
- rbconn = gc_nodes[--gc_count];
- rb_erase(&rbconn->node, root);
- kmem_cache_free(connlimit_rb_cachep, rbconn);
- }
-}
-
-static unsigned int
-count_tree(struct net *net, struct rb_root *root,
- const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr,
- u8 family, const struct nf_conntrack_zone *zone)
-{
- struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
- struct rb_node **rbnode, *parent;
- struct xt_connlimit_rb *rbconn;
- struct xt_connlimit_conn *conn;
- unsigned int gc_count;
- bool no_gc = false;
-
- restart:
- gc_count = 0;
- parent = NULL;
- rbnode = &(root->rb_node);
- while (*rbnode) {
- int diff;
- bool addit;
-
- rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
-
- parent = *rbnode;
- diff = same_source(addr, &rbconn->addr, family);
- if (diff < 0) {
- rbnode = &((*rbnode)->rb_left);
- } else if (diff > 0) {
- rbnode = &((*rbnode)->rb_right);
- } else {
- /* same source network -> be counted! */
- unsigned int count;
- count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
-
- tree_nodes_free(root, gc_nodes, gc_count);
- if (!addit)
- return count;
-
- if (!add_hlist(&rbconn->hhead, tuple, addr))
- return 0; /* hotdrop */
-
- return count + 1;
- }
-
- if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
- continue;
-
- /* only used for GC on hhead, retval and 'addit' ignored */
- check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
- if (hlist_empty(&rbconn->hhead))
- gc_nodes[gc_count++] = rbconn;
- }
-
- if (gc_count) {
- no_gc = true;
- tree_nodes_free(root, gc_nodes, gc_count);
- /* tree_node_free before new allocation permits
- * allocator to re-use newly free'd object.
- *
- * This is a rare event; in most cases we will find
- * existing node to re-use. (or gc_count is 0).
- */
- goto restart;
- }
-
- /* no match, need to insert new node */
- rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
- if (rbconn == NULL)
- return 0;
-
- conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
- if (conn == NULL) {
- kmem_cache_free(connlimit_rb_cachep, rbconn);
- return 0;
- }
-
- conn->tuple = *tuple;
- rbconn->addr = *addr;
-
- INIT_HLIST_HEAD(&rbconn->hhead);
- hlist_add_head(&conn->node, &rbconn->hhead);
-
- rb_link_node(&rbconn->node, parent, rbnode);
- rb_insert_color(&rbconn->node, root);
- return 1;
-}
-
-static int count_them(struct net *net,
- struct xt_connlimit_data *data,
- const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr,
- u_int8_t family,
- const struct nf_conntrack_zone *zone)
-{
- struct rb_root *root;
- int count;
- u32 hash;
-
- if (family == NFPROTO_IPV6)
- hash = connlimit_iphash6(addr);
- else
- hash = connlimit_iphash(addr->ip);
- root = &data->climit_root[hash];
-
- spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
-
- count = count_tree(net, root, tuple, addr, family, zone);
-
- spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
-
- return count;
-}
+#include <net/netfilter/nf_conntrack_count.h>
static bool
connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct net *net = xt_net(par);
const struct xt_connlimit_info *info = par->matchinfo;
- union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int connections;
+ u32 key[5];
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL) {
@@ -310,6 +48,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (xt_family(par) == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ union nf_inet_addr addr;
unsigned int i;
memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
@@ -317,22 +56,24 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
addr.ip6[i] &= info->mask.ip6[i];
+ memcpy(key, &addr, sizeof(addr.ip6));
+ key[4] = zone->id;
} else {
const struct iphdr *iph = ip_hdr(skb);
- addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
+ key[0] = (info->flags & XT_CONNLIMIT_DADDR) ?
iph->daddr : iph->saddr;
- addr.ip &= info->mask.ip;
+ key[0] &= info->mask.ip;
+ key[1] = zone->id;
}
- connections = count_them(net, info->data, tuple_ptr, &addr,
- xt_family(par), zone);
+ connections = nf_conncount_count(net, info->data, key,
+ xt_family(par), tuple_ptr, zone);
if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
- return (connections > info->limit) ^
- !!(info->flags & XT_CONNLIMIT_INVERT);
+ return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT);
hotdrop:
par->hotdrop = true;
@@ -342,61 +83,27 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
static int connlimit_mt_check(const struct xt_mtchk_param *par)
{
struct xt_connlimit_info *info = par->matchinfo;
- unsigned int i;
- int ret;
+ unsigned int keylen;
- net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd));
-
- ret = nf_ct_netns_get(par->net, par->family);
- if (ret < 0) {
- pr_info("cannot load conntrack support for "
- "address family %u\n", par->family);
- return ret;
- }
+ keylen = sizeof(u32);
+ if (par->family == NFPROTO_IPV6)
+ keylen += sizeof(struct in6_addr);
+ else
+ keylen += sizeof(struct in_addr);
/* init private data */
- info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
- if (info->data == NULL) {
- nf_ct_netns_put(par->net, par->family);
- return -ENOMEM;
- }
-
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
- info->data->climit_root[i] = RB_ROOT;
+ info->data = nf_conncount_init(par->net, par->family, keylen);
+ if (IS_ERR(info->data))
+ return PTR_ERR(info->data);
return 0;
}
-static void destroy_tree(struct rb_root *r)
-{
- struct xt_connlimit_conn *conn;
- struct xt_connlimit_rb *rbconn;
- struct hlist_node *n;
- struct rb_node *node;
-
- while ((node = rb_first(r)) != NULL) {
- rbconn = rb_entry(node, struct xt_connlimit_rb, node);
-
- rb_erase(node, r);
-
- hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
- kmem_cache_free(connlimit_conn_cachep, conn);
-
- kmem_cache_free(connlimit_rb_cachep, rbconn);
- }
-}
-
static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_connlimit_info *info = par->matchinfo;
- unsigned int i;
-
- nf_ct_netns_put(par->net, par->family);
-
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
- destroy_tree(&info->data->climit_root[i]);
- kfree(info->data);
+ nf_conncount_destroy(par->net, par->family, info->data);
}
static struct xt_match connlimit_mt_reg __read_mostly = {
@@ -413,40 +120,12 @@ static struct xt_match connlimit_mt_reg __read_mostly = {
static int __init connlimit_mt_init(void)
{
- int ret, i;
-
- BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
- BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
-
- for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i)
- spin_lock_init(&xt_connlimit_locks[i]);
-
- connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
- sizeof(struct xt_connlimit_conn),
- 0, 0, NULL);
- if (!connlimit_conn_cachep)
- return -ENOMEM;
-
- connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
- sizeof(struct xt_connlimit_rb),
- 0, 0, NULL);
- if (!connlimit_rb_cachep) {
- kmem_cache_destroy(connlimit_conn_cachep);
- return -ENOMEM;
- }
- ret = xt_register_match(&connlimit_mt_reg);
- if (ret != 0) {
- kmem_cache_destroy(connlimit_conn_cachep);
- kmem_cache_destroy(connlimit_rb_cachep);
- }
- return ret;
+ return xt_register_match(&connlimit_mt_reg);
}
static void __exit connlimit_mt_exit(void)
{
xt_unregister_match(&connlimit_mt_reg);
- kmem_cache_destroy(connlimit_conn_cachep);
- kmem_cache_destroy(connlimit_rb_cachep);
}
module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 64285702afd5..16b6b11ee83f 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -39,13 +39,17 @@ match_set(ip_set_id_t index, const struct sk_buff *skb,
return inv;
}
-#define ADT_OPT(n, f, d, fs, cfs, t) \
-struct ip_set_adt_opt n = { \
- .family = f, \
- .dim = d, \
- .flags = fs, \
- .cmdflags = cfs, \
- .ext.timeout = t, \
+#define ADT_OPT(n, f, d, fs, cfs, t, p, b, po, bo) \
+struct ip_set_adt_opt n = { \
+ .family = f, \
+ .dim = d, \
+ .flags = fs, \
+ .cmdflags = cfs, \
+ .ext.timeout = t, \
+ .ext.packets = p, \
+ .ext.bytes = b, \
+ .ext.packets_op = po, \
+ .ext.bytes_op = bo, \
}
/* Revision 0 interface: backward compatible with netfilter/iptables */
@@ -56,7 +60,8 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_set_info_match_v0 *info = par->matchinfo;
ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim,
- info->match_set.u.compat.flags, 0, UINT_MAX);
+ info->match_set.u.compat.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
return match_set(info->match_set.index, skb, par, &opt,
info->match_set.u.compat.flags & IPSET_INV_MATCH);
@@ -119,7 +124,8 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_set_info_match_v1 *info = par->matchinfo;
ADT_OPT(opt, xt_family(par), info->match_set.dim,
- info->match_set.flags, 0, UINT_MAX);
+ info->match_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
if (opt.flags & IPSET_RETURN_NOMATCH)
opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
@@ -161,45 +167,21 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par)
/* Revision 3 match */
static bool
-match_counter0(u64 counter, const struct ip_set_counter_match0 *info)
-{
- switch (info->op) {
- case IPSET_COUNTER_NONE:
- return true;
- case IPSET_COUNTER_EQ:
- return counter == info->value;
- case IPSET_COUNTER_NE:
- return counter != info->value;
- case IPSET_COUNTER_LT:
- return counter < info->value;
- case IPSET_COUNTER_GT:
- return counter > info->value;
- }
- return false;
-}
-
-static bool
set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v3 *info = par->matchinfo;
- int ret;
ADT_OPT(opt, xt_family(par), info->match_set.dim,
- info->match_set.flags, info->flags, UINT_MAX);
+ info->match_set.flags, info->flags, UINT_MAX,
+ info->packets.value, info->bytes.value,
+ info->packets.op, info->bytes.op);
if (info->packets.op != IPSET_COUNTER_NONE ||
info->bytes.op != IPSET_COUNTER_NONE)
opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
- ret = match_set(info->match_set.index, skb, par, &opt,
- info->match_set.flags & IPSET_INV_MATCH);
-
- if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
- return ret;
-
- if (!match_counter0(opt.ext.packets, &info->packets))
- return false;
- return match_counter0(opt.ext.bytes, &info->bytes);
+ return match_set(info->match_set.index, skb, par, &opt,
+ info->match_set.flags & IPSET_INV_MATCH);
}
#define set_match_v3_checkentry set_match_v1_checkentry
@@ -208,45 +190,21 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
/* Revision 4 match */
static bool
-match_counter(u64 counter, const struct ip_set_counter_match *info)
-{
- switch (info->op) {
- case IPSET_COUNTER_NONE:
- return true;
- case IPSET_COUNTER_EQ:
- return counter == info->value;
- case IPSET_COUNTER_NE:
- return counter != info->value;
- case IPSET_COUNTER_LT:
- return counter < info->value;
- case IPSET_COUNTER_GT:
- return counter > info->value;
- }
- return false;
-}
-
-static bool
set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v4 *info = par->matchinfo;
- int ret;
ADT_OPT(opt, xt_family(par), info->match_set.dim,
- info->match_set.flags, info->flags, UINT_MAX);
+ info->match_set.flags, info->flags, UINT_MAX,
+ info->packets.value, info->bytes.value,
+ info->packets.op, info->bytes.op);
if (info->packets.op != IPSET_COUNTER_NONE ||
info->bytes.op != IPSET_COUNTER_NONE)
opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
- ret = match_set(info->match_set.index, skb, par, &opt,
- info->match_set.flags & IPSET_INV_MATCH);
-
- if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
- return ret;
-
- if (!match_counter(opt.ext.packets, &info->packets))
- return false;
- return match_counter(opt.ext.bytes, &info->bytes);
+ return match_set(info->match_set.index, skb, par, &opt,
+ info->match_set.flags & IPSET_INV_MATCH);
}
#define set_match_v4_checkentry set_match_v1_checkentry
@@ -260,9 +218,11 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_set_info_target_v0 *info = par->targinfo;
ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim,
- info->add_set.u.compat.flags, 0, UINT_MAX);
+ info->add_set.u.compat.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim,
- info->del_set.u.compat.flags, 0, UINT_MAX);
+ info->del_set.u.compat.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_add(info->add_set.index, skb, par, &add_opt);
@@ -333,9 +293,11 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_set_info_target_v1 *info = par->targinfo;
ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
- info->add_set.flags, 0, UINT_MAX);
+ info->add_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
- info->del_set.flags, 0, UINT_MAX);
+ info->del_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_add(info->add_set.index, skb, par, &add_opt);
@@ -402,9 +364,11 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_set_info_target_v2 *info = par->targinfo;
ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
- info->add_set.flags, info->flags, info->timeout);
+ info->add_set.flags, info->flags, info->timeout,
+ 0, 0, 0, 0);
ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
- info->del_set.flags, 0, UINT_MAX);
+ info->del_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
/* Normalize to fit into jiffies */
if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
@@ -432,11 +396,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
int ret;
ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
- info->add_set.flags, info->flags, info->timeout);
+ info->add_set.flags, info->flags, info->timeout,
+ 0, 0, 0, 0);
ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
- info->del_set.flags, 0, UINT_MAX);
+ info->del_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
ADT_OPT(map_opt, xt_family(par), info->map_set.dim,
- info->map_set.flags, 0, UINT_MAX);
+ info->map_set.flags, 0, UINT_MAX,
+ 0, 0, 0, 0);
/* Normalize to fit into jiffies */
if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 3e7747549f90..bb95c43aae76 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -16,7 +16,6 @@
* 02110-1301, USA
*/
-#include <linux/hardirq.h>
#include <linux/if_vlan.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index bc2f1e0977d6..634cfcb7bba6 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
+ if (args->nr_local == 0)
+ return -EINVAL;
+
/* figure out the number of pages in the vector */
for (i = 0; i < args->nr_local; i++) {
if (copy_from_user(&vec, &local_vec[i],
@@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
err:
if (page)
put_page(page);
+ rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
return ret;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 9d632e92cad0..b56986d41c87 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
if (action == TC_ACT_SHOT)
this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 37e5e4decbd6..e6ff88f72900 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -231,7 +231,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
struct tcf_t *tm = &m->tcf_tm;
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index a392eaa4a0b4..0af1c1254e0b 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -344,32 +344,24 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct red_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- struct tc_red_xstats st = {
- .early = q->stats.prob_drop + q->stats.forced_drop,
- .pdrop = q->stats.pdrop,
- .other = q->stats.other,
- .marked = q->stats.prob_mark + q->stats.forced_mark,
- };
+ struct tc_red_xstats st = {0};
if (sch->flags & TCQ_F_OFFLOADED) {
- struct red_stats hw_stats = {0};
struct tc_red_qopt_offload hw_stats_request = {
.command = TC_RED_XSTATS,
.handle = sch->handle,
.parent = sch->parent,
{
- .xstats = &hw_stats,
+ .xstats = &q->stats,
},
};
- if (!dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_QDISC_RED,
- &hw_stats_request)) {
- st.early += hw_stats.prob_drop + hw_stats.forced_drop;
- st.pdrop += hw_stats.pdrop;
- st.other += hw_stats.other;
- st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
- }
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+ &hw_stats_request);
}
+ st.early = q->stats.prob_drop + q->stats.forced_drop;
+ st.pdrop = q->stats.pdrop;
+ st.other = q->stats.other;
+ st.marked = q->stats.prob_mark + q->stats.forced_mark;
return gnet_stats_copy_app(d, &st, sizeof(st));
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 621b5ca3fd1c..141c9c466ec1 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
return;
}
- if (t->param_flags & SPP_PMTUD_ENABLE) {
- /* Update transports view of the MTU */
- sctp_transport_update_pmtu(t, pmtu);
-
- /* Update association pmtu. */
- sctp_assoc_sync_pmtu(asoc);
- }
+ if (!(t->param_flags & SPP_PMTUD_ENABLE))
+ /* We can't allow retransmitting in such case, as the
+ * retransmission would be sized just as before, and thus we
+ * would get another icmp, and retransmit again.
+ */
+ return;
- /* Retransmit with the new pmtu setting.
- * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
- * Needed will never be sent, but if a message was sent before
- * PMTU discovery was disabled that was larger than the PMTU, it
- * would not be fragmented, so it must be re-transmitted fragmented.
+ /* Update transports view of the MTU. Return if no update was needed.
+ * If an update wasn't needed/possible, it also doesn't make sense to
+ * try to retransmit now.
*/
+ if (!sctp_transport_update_pmtu(t, pmtu))
+ return;
+
+ /* Update association pmtu. */
+ sctp_assoc_sync_pmtu(asoc);
+
+ /* Retransmit with the new pmtu setting. */
sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
}
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 06b644dd858c..cedf672487f9 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
sctp_stream_outq_migrate(stream, NULL, outcnt);
sched->sched_all(stream);
- i = sctp_stream_alloc_out(stream, outcnt, gfp);
- if (i)
- return i;
+ ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (ret)
+ goto out;
stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
@@ -171,19 +171,17 @@ in:
if (!incnt)
goto out;
- i = sctp_stream_alloc_in(stream, incnt, gfp);
- if (i) {
- ret = -ENOMEM;
- goto free;
+ ret = sctp_stream_alloc_in(stream, incnt, gfp);
+ if (ret) {
+ sched->free(stream);
+ kfree(stream->out);
+ stream->out = NULL;
+ stream->outcnt = 0;
+ goto out;
}
stream->incnt = incnt;
- goto out;
-free:
- sched->free(stream);
- kfree(stream->out);
- stream->out = NULL;
out:
return ret;
}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1e5a22430cf5..47f82bd794d9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
{
struct dst_entry *dst = sctp_transport_dst_check(t);
+ bool change = true;
if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
- pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
- __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
- /* Use default minimum segment size and disable
- * pmtu discovery on this transport.
- */
- t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
- } else {
- t->pathmtu = pmtu;
+ pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
+ __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
+ /* Use default minimum segment instead */
+ pmtu = SCTP_DEFAULT_MINSEGMENT;
}
+ pmtu = SCTP_TRUNC4(pmtu);
if (dst) {
dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
dst = sctp_transport_dst_check(t);
}
- if (!dst)
+ if (!dst) {
t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
+ dst = t->dst;
+ }
+
+ if (dst) {
+ /* Re-fetch, as under layers may have a higher minimum size */
+ pmtu = SCTP_TRUNC4(dst_mtu(dst));
+ change = t->pathmtu != pmtu;
+ }
+ t->pathmtu = pmtu;
+
+ return change;
}
/* Caches the dst entry and source address for a transport's destination
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 964342689f2c..20b21af2ff14 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -49,7 +49,6 @@
#include <linux/uaccess.h>
#include <linux/interrupt.h>
#include <linux/atomic.h>
-#include <asm/hardirq.h>
#include <linux/netdevice.h>
#include <linux/in.h>
#include <linux/list.h>
diff --git a/net/tipc/group.c b/net/tipc/group.c
index fb7fe971e51b..497ee34bfab9 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -49,8 +49,6 @@
#define ADV_ACTIVE (ADV_UNIT * 12)
enum mbr_state {
- MBR_QUARANTINED,
- MBR_DISCOVERED,
MBR_JOINING,
MBR_PUBLISHED,
MBR_JOINED,
@@ -65,7 +63,6 @@ struct tipc_member {
struct rb_node tree_node;
struct list_head list;
struct list_head small_win;
- struct sk_buff *event_msg;
struct sk_buff_head deferredq;
struct tipc_group *group;
u32 node;
@@ -77,7 +74,6 @@ struct tipc_member {
u16 bc_rcv_nxt;
u16 bc_syncpt;
u16 bc_acked;
- bool usr_pending;
};
struct tipc_group {
@@ -85,13 +81,11 @@ struct tipc_group {
struct list_head small_win;
struct list_head pending;
struct list_head active;
- struct list_head reclaiming;
struct tipc_nlist dests;
struct net *net;
int subid;
u32 type;
u32 instance;
- u32 domain;
u32 scope;
u32 portid;
u16 member_cnt;
@@ -101,15 +95,32 @@ struct tipc_group {
u16 bc_ackers;
bool loopback;
bool events;
+ bool open;
};
static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
int mtyp, struct sk_buff_head *xmitq);
+bool tipc_group_is_open(struct tipc_group *grp)
+{
+ return grp->open;
+}
+
+static void tipc_group_open(struct tipc_member *m, bool *wakeup)
+{
+ *wakeup = false;
+ if (list_empty(&m->small_win))
+ return;
+ list_del_init(&m->small_win);
+ m->group->open = true;
+ *wakeup = true;
+}
+
static void tipc_group_decr_active(struct tipc_group *grp,
struct tipc_member *m)
{
- if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+ if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
+ m->state == MBR_REMITTED)
grp->active_cnt--;
}
@@ -138,12 +149,12 @@ u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
static bool tipc_group_is_receiver(struct tipc_member *m)
{
- return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
+ return m && m->state != MBR_JOINING && m->state != MBR_LEAVING;
}
static bool tipc_group_is_sender(struct tipc_member *m)
{
- return m && m->state >= MBR_JOINED;
+ return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED;
}
u32 tipc_group_exclude(struct tipc_group *grp)
@@ -161,6 +172,8 @@ int tipc_group_size(struct tipc_group *grp)
struct tipc_group *tipc_group_create(struct net *net, u32 portid,
struct tipc_group_req *mreq)
{
+ u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS;
+ bool global = mreq->scope != TIPC_NODE_SCOPE;
struct tipc_group *grp;
u32 type = mreq->type;
@@ -171,22 +184,37 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid,
INIT_LIST_HEAD(&grp->small_win);
INIT_LIST_HEAD(&grp->active);
INIT_LIST_HEAD(&grp->pending);
- INIT_LIST_HEAD(&grp->reclaiming);
grp->members = RB_ROOT;
grp->net = net;
grp->portid = portid;
- grp->domain = addr_domain(net, mreq->scope);
grp->type = type;
grp->instance = mreq->instance;
grp->scope = mreq->scope;
grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
- if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
+ filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE;
+ if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0,
+ filter, &grp->subid))
return grp;
kfree(grp);
return NULL;
}
+void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf)
+{
+ struct rb_root *tree = &grp->members;
+ struct tipc_member *m, *tmp;
+ struct sk_buff_head xmitq;
+
+ skb_queue_head_init(&xmitq);
+ rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq);
+ tipc_group_update_member(m, 0);
+ }
+ tipc_node_distr_xmit(net, &xmitq);
+ *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
+}
+
void tipc_group_delete(struct net *net, struct tipc_group *grp)
{
struct rb_root *tree = &grp->members;
@@ -277,7 +305,7 @@ static void tipc_group_add_to_tree(struct tipc_group *grp,
static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
u32 node, u32 port,
- int state)
+ u32 instance, int state)
{
struct tipc_member *m;
@@ -290,6 +318,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
m->group = grp;
m->node = node;
m->port = port;
+ m->instance = instance;
m->bc_acked = grp->bc_snd_nxt - 1;
grp->member_cnt++;
tipc_group_add_to_tree(grp, m);
@@ -298,9 +327,10 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
return m;
}
-void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
+void tipc_group_add_member(struct tipc_group *grp, u32 node,
+ u32 port, u32 instance)
{
- tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
+ tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED);
}
static void tipc_group_delete_member(struct tipc_group *grp,
@@ -391,20 +421,20 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
int adv, state;
m = tipc_group_find_dest(grp, dnode, dport);
- *mbr = m;
- if (!m)
+ if (!tipc_group_is_receiver(m)) {
+ *mbr = NULL;
return false;
- if (m->usr_pending)
- return true;
+ }
+ *mbr = m;
+
if (m->window >= len)
return false;
- m->usr_pending = true;
+
+ grp->open = false;
/* If not fully advertised, do it now to prevent mutual blocking */
adv = m->advertised;
state = m->state;
- if (state < MBR_JOINED)
- return true;
if (state == MBR_JOINED && adv == ADV_IDLE)
return true;
if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
@@ -422,9 +452,10 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len)
struct tipc_member *m = NULL;
/* If prev bcast was replicast, reject until all receivers have acked */
- if (grp->bc_ackers)
+ if (grp->bc_ackers) {
+ grp->open = false;
return true;
-
+ }
if (list_empty(&grp->small_win))
return false;
@@ -560,7 +591,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
int max_active = grp->max_active;
int reclaim_limit = max_active * 3 / 4;
int active_cnt = grp->active_cnt;
- struct tipc_member *m, *rm;
+ struct tipc_member *m, *rm, *pm;
m = tipc_group_find_member(grp, node, port);
if (!m)
@@ -570,24 +601,34 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
switch (m->state) {
case MBR_JOINED:
- /* Reclaim advertised space from least active member */
- if (!list_empty(active) && active_cnt >= reclaim_limit) {
+ /* First, decide if member can go active */
+ if (active_cnt <= max_active) {
+ m->state = MBR_ACTIVE;
+ list_add_tail(&m->list, active);
+ grp->active_cnt++;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ } else {
+ m->state = MBR_PENDING;
+ list_add_tail(&m->list, &grp->pending);
+ }
+
+ if (active_cnt < reclaim_limit)
+ break;
+
+ /* Reclaim from oldest active member, if possible */
+ if (!list_empty(active)) {
rm = list_first_entry(active, struct tipc_member, list);
rm->state = MBR_RECLAIMING;
- list_move_tail(&rm->list, &grp->reclaiming);
+ list_del_init(&rm->list);
tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
- }
- /* If max active, become pending and wait for reclaimed space */
- if (active_cnt >= max_active) {
- m->state = MBR_PENDING;
- list_add_tail(&m->list, &grp->pending);
break;
}
- /* Otherwise become active */
- m->state = MBR_ACTIVE;
- list_add_tail(&m->list, &grp->active);
- grp->active_cnt++;
- /* Fall through */
+ /* Nobody to reclaim from; - revert oldest pending to JOINED */
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ list_del_init(&pm->list);
+ pm->state = MBR_JOINED;
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
+ break;
case MBR_ACTIVE:
if (!list_is_last(&m->list, &grp->active))
list_move_tail(&m->list, &grp->active);
@@ -599,13 +640,23 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
if (m->advertised > ADV_IDLE)
break;
m->state = MBR_JOINED;
+ grp->active_cnt--;
if (m->advertised < ADV_IDLE) {
pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
}
+
+ if (list_empty(&grp->pending))
+ return;
+
+ /* Set oldest pending member to active and advertise */
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
break;
case MBR_RECLAIMING:
- case MBR_DISCOVERED:
case MBR_JOINING:
case MBR_LEAVING:
default:
@@ -613,6 +664,40 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
}
}
+static void tipc_group_create_event(struct tipc_group *grp,
+ struct tipc_member *m,
+ u32 event, u16 seqno,
+ struct sk_buff_head *inputq)
+{ u32 dnode = tipc_own_addr(grp->net);
+ struct tipc_event evt;
+ struct sk_buff *skb;
+ struct tipc_msg *hdr;
+
+ evt.event = event;
+ evt.found_lower = m->instance;
+ evt.found_upper = m->instance;
+ evt.port.ref = m->port;
+ evt.port.node = m->node;
+ evt.s.seq.type = grp->type;
+ evt.s.seq.lower = m->instance;
+ evt.s.seq.upper = m->instance;
+
+ skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT,
+ GROUP_H_SIZE, sizeof(evt), dnode, m->node,
+ grp->portid, m->port, 0);
+ if (!skb)
+ return;
+
+ hdr = buf_msg(skb);
+ msg_set_nametype(hdr, grp->type);
+ msg_set_grp_evt(hdr, event);
+ msg_set_dest_droppable(hdr, true);
+ msg_set_grp_bc_seqno(hdr, seqno);
+ memcpy(msg_data(hdr), &evt, sizeof(evt));
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ __skb_queue_tail(inputq, skb);
+}
+
static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
int mtyp, struct sk_buff_head *xmitq)
{
@@ -658,107 +743,95 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
u32 node = msg_orignode(hdr);
u32 port = msg_origport(hdr);
struct tipc_member *m, *pm;
- struct tipc_msg *ehdr;
u16 remitted, in_flight;
if (!grp)
return;
+ if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net))
+ return;
+
m = tipc_group_find_member(grp, node, port);
switch (msg_type(hdr)) {
case GRP_JOIN_MSG:
if (!m)
m = tipc_group_create_member(grp, node, port,
- MBR_QUARANTINED);
+ 0, MBR_JOINING);
if (!m)
return;
m->bc_syncpt = msg_grp_bc_syncpt(hdr);
m->bc_rcv_nxt = m->bc_syncpt;
m->window += msg_adv_win(hdr);
- /* Wait until PUBLISH event is received */
- if (m->state == MBR_DISCOVERED) {
- m->state = MBR_JOINING;
- } else if (m->state == MBR_PUBLISHED) {
- m->state = MBR_JOINED;
- *usr_wakeup = true;
- m->usr_pending = false;
- tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
- ehdr = buf_msg(m->event_msg);
- msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
- __skb_queue_tail(inputq, m->event_msg);
- }
- list_del_init(&m->small_win);
+ /* Wait until PUBLISH event is received if necessary */
+ if (m->state != MBR_PUBLISHED)
+ return;
+
+ /* Member can be taken into service */
+ m->state = MBR_JOINED;
+ tipc_group_open(m, usr_wakeup);
tipc_group_update_member(m, 0);
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ tipc_group_create_event(grp, m, TIPC_PUBLISHED,
+ m->bc_syncpt, inputq);
return;
case GRP_LEAVE_MSG:
if (!m)
return;
m->bc_syncpt = msg_grp_bc_syncpt(hdr);
list_del_init(&m->list);
- list_del_init(&m->small_win);
- *usr_wakeup = true;
-
- /* Wait until WITHDRAW event is received */
- if (m->state != MBR_LEAVING) {
- tipc_group_decr_active(grp, m);
- m->state = MBR_LEAVING;
- return;
- }
- /* Otherwise deliver already received WITHDRAW event */
- ehdr = buf_msg(m->event_msg);
- msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
- __skb_queue_tail(inputq, m->event_msg);
+ tipc_group_open(m, usr_wakeup);
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
+ m->bc_syncpt, inputq);
return;
case GRP_ADV_MSG:
if (!m)
return;
m->window += msg_adv_win(hdr);
- *usr_wakeup = m->usr_pending;
- m->usr_pending = false;
- list_del_init(&m->small_win);
+ tipc_group_open(m, usr_wakeup);
return;
case GRP_ACK_MSG:
if (!m)
return;
m->bc_acked = msg_grp_bc_acked(hdr);
if (--grp->bc_ackers)
- break;
+ return;
+ list_del_init(&m->small_win);
+ m->group->open = true;
*usr_wakeup = true;
- m->usr_pending = false;
+ tipc_group_update_member(m, 0);
return;
case GRP_RECLAIM_MSG:
if (!m)
return;
- *usr_wakeup = m->usr_pending;
- m->usr_pending = false;
tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
m->window = ADV_IDLE;
+ tipc_group_open(m, usr_wakeup);
return;
case GRP_REMIT_MSG:
if (!m || m->state != MBR_RECLAIMING)
return;
- list_del_init(&m->list);
- grp->active_cnt--;
remitted = msg_grp_remitted(hdr);
/* Messages preceding the REMIT still in receive queue */
if (m->advertised > remitted) {
m->state = MBR_REMITTED;
in_flight = m->advertised - remitted;
+ m->advertised = ADV_IDLE + in_flight;
+ return;
}
- /* All messages preceding the REMIT have been read */
- if (m->advertised <= remitted) {
- m->state = MBR_JOINED;
- in_flight = 0;
- }
- /* ..and the REMIT overtaken by more messages => re-advertise */
+ /* This should never happen */
if (m->advertised < remitted)
- tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ pr_warn_ratelimited("Unexpected REMIT msg\n");
- m->advertised = ADV_IDLE + in_flight;
+ /* All messages preceding the REMIT have been read */
+ m->state = MBR_JOINED;
+ grp->active_cnt--;
+ m->advertised = ADV_IDLE;
/* Set oldest pending member to active and advertise */
if (list_empty(&grp->pending))
@@ -780,11 +853,10 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
void tipc_group_member_evt(struct tipc_group *grp,
bool *usr_wakeup,
int *sk_rcvbuf,
- struct sk_buff *skb,
+ struct tipc_msg *hdr,
struct sk_buff_head *inputq,
struct sk_buff_head *xmitq)
{
- struct tipc_msg *hdr = buf_msg(skb);
struct tipc_event *evt = (void *)msg_data(hdr);
u32 instance = evt->found_lower;
u32 node = evt->port.node;
@@ -792,86 +864,59 @@ void tipc_group_member_evt(struct tipc_group *grp,
int event = evt->event;
struct tipc_member *m;
struct net *net;
- bool node_up;
u32 self;
if (!grp)
- goto drop;
+ return;
net = grp->net;
self = tipc_own_addr(net);
if (!grp->loopback && node == self && port == grp->portid)
- goto drop;
-
- /* Convert message before delivery to user */
- msg_set_hdr_sz(hdr, GROUP_H_SIZE);
- msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
- msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
- msg_set_origport(hdr, port);
- msg_set_orignode(hdr, node);
- msg_set_nametype(hdr, grp->type);
- msg_set_grp_evt(hdr, event);
+ return;
m = tipc_group_find_member(grp, node, port);
- if (event == TIPC_PUBLISHED) {
- if (!m)
- m = tipc_group_create_member(grp, node, port,
- MBR_DISCOVERED);
- if (!m)
- goto drop;
-
- /* Hold back event if JOIN message not yet received */
- if (m->state == MBR_DISCOVERED) {
- m->event_msg = skb;
- m->state = MBR_PUBLISHED;
- } else {
- msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
- __skb_queue_tail(inputq, skb);
- m->state = MBR_JOINED;
- *usr_wakeup = true;
- m->usr_pending = false;
+ switch (event) {
+ case TIPC_PUBLISHED:
+ /* Send and wait for arrival of JOIN message if necessary */
+ if (!m) {
+ m = tipc_group_create_member(grp, node, port, instance,
+ MBR_PUBLISHED);
+ if (!m)
+ break;
+ tipc_group_update_member(m, 0);
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+ break;
}
+
+ if (m->state != MBR_JOINING)
+ break;
+
+ /* Member can be taken into service */
m->instance = instance;
- TIPC_SKB_CB(skb)->orig_member = m->instance;
- tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+ m->state = MBR_JOINED;
+ tipc_group_open(m, usr_wakeup);
tipc_group_update_member(m, 0);
- } else if (event == TIPC_WITHDRAWN) {
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+ tipc_group_create_event(grp, m, TIPC_PUBLISHED,
+ m->bc_syncpt, inputq);
+ break;
+ case TIPC_WITHDRAWN:
if (!m)
- goto drop;
-
- TIPC_SKB_CB(skb)->orig_member = m->instance;
+ break;
- *usr_wakeup = true;
- m->usr_pending = false;
- node_up = tipc_node_is_up(net, node);
- m->event_msg = NULL;
-
- if (node_up) {
- /* Hold back event if a LEAVE msg should be expected */
- if (m->state != MBR_LEAVING) {
- m->event_msg = skb;
- tipc_group_decr_active(grp, m);
- m->state = MBR_LEAVING;
- } else {
- msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
- __skb_queue_tail(inputq, skb);
- }
- } else {
- if (m->state != MBR_LEAVING) {
- tipc_group_decr_active(grp, m);
- m->state = MBR_LEAVING;
- msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
- } else {
- msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
- }
- __skb_queue_tail(inputq, skb);
- }
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
list_del_init(&m->list);
- list_del_init(&m->small_win);
+ tipc_group_open(m, usr_wakeup);
+
+ /* Only send event if no LEAVE message can be expected */
+ if (!tipc_node_is_up(net, node))
+ tipc_group_create_event(grp, m, TIPC_WITHDRAWN,
+ m->bc_rcv_nxt, inputq);
+ break;
+ default:
+ break;
}
*sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
- return;
-drop:
- kfree_skb(skb);
}
diff --git a/net/tipc/group.h b/net/tipc/group.h
index d525e1cd7de5..f4a596ed9848 100644
--- a/net/tipc/group.h
+++ b/net/tipc/group.h
@@ -44,8 +44,10 @@ struct tipc_msg;
struct tipc_group *tipc_group_create(struct net *net, u32 portid,
struct tipc_group_req *mreq);
+void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf);
void tipc_group_delete(struct net *net, struct tipc_group *grp);
-void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port);
+void tipc_group_add_member(struct tipc_group *grp, u32 node,
+ u32 port, u32 instance);
struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
int *scope);
@@ -54,7 +56,7 @@ void tipc_group_filter_msg(struct tipc_group *grp,
struct sk_buff_head *inputq,
struct sk_buff_head *xmitq);
void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
- int *sk_rcvbuf, struct sk_buff *skb,
+ int *sk_rcvbuf, struct tipc_msg *hdr,
struct sk_buff_head *inputq,
struct sk_buff_head *xmitq);
void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
@@ -65,9 +67,9 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
int len, struct tipc_member **m);
bool tipc_group_bc_cong(struct tipc_group *grp, int len);
+bool tipc_group_is_open(struct tipc_group *grp);
void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
u32 port, struct sk_buff_head *xmitq);
u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
void tipc_group_update_member(struct tipc_member *m, int len);
-int tipc_group_size(struct tipc_group *grp);
#endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index b3829bcf63c7..64cdd3c302b0 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -328,7 +328,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
TIPC_PUBLISHED, publ->ref,
- publ->node, created_subseq);
+ publ->node, publ->scope,
+ created_subseq);
}
return publ;
}
@@ -398,19 +399,21 @@ found:
list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
TIPC_WITHDRAWN, publ->ref,
- publ->node, removed_subseq);
+ publ->node, publ->scope,
+ removed_subseq);
}
return publ;
}
/**
- * tipc_nameseq_subscribe - attach a subscription, and issue
- * the prescribed number of events if there is any sub-
+ * tipc_nameseq_subscribe - attach a subscription, and optionally
+ * issue the prescribed number of events if there is any sub-
* sequence overlapping with the requested sequence
*/
static void tipc_nameseq_subscribe(struct name_seq *nseq,
- struct tipc_subscription *s)
+ struct tipc_subscription *s,
+ bool status)
{
struct sub_seq *sseq = nseq->sseqs;
struct tipc_name_seq ns;
@@ -420,7 +423,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
tipc_subscrp_get(s);
list_add(&s->nameseq_list, &nseq->subscriptions);
- if (!sseq)
+ if (!status || !sseq)
return;
while (sseq != &nseq->sseqs[nseq->first_free]) {
@@ -434,6 +437,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
sseq->upper,
TIPC_PUBLISHED,
crs->ref, crs->node,
+ crs->scope,
must_report);
must_report = 0;
}
@@ -597,7 +601,7 @@ not_found:
return ref;
}
-bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
struct list_head *dsts, int *dstcnt, u32 exclude,
bool all)
{
@@ -607,9 +611,6 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
struct name_seq *seq;
struct sub_seq *sseq;
- if (!tipc_in_scope(domain, self))
- return false;
-
*dstcnt = 0;
rcu_read_lock();
seq = nametbl_find_seq(net, type);
@@ -620,7 +621,7 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
if (likely(sseq)) {
info = sseq->info;
list_for_each_entry(publ, &info->zone_list, zone_list) {
- if (!tipc_in_scope(domain, publ->node))
+ if (publ->scope != scope)
continue;
if (publ->ref == exclude && publ->node == self)
continue;
@@ -638,13 +639,14 @@ exit:
return !list_empty(dsts);
}
-int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
- u32 limit, struct list_head *dports)
+int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+ u32 scope, bool exact, struct list_head *dports)
{
- struct name_seq *seq;
- struct sub_seq *sseq;
struct sub_seq *sseq_stop;
struct name_info *info;
+ struct publication *p;
+ struct name_seq *seq;
+ struct sub_seq *sseq;
int res = 0;
rcu_read_lock();
@@ -656,15 +658,12 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
sseq_stop = seq->sseqs + seq->first_free;
for (; sseq != sseq_stop; sseq++) {
- struct publication *publ;
-
if (sseq->lower > upper)
break;
-
info = sseq->info;
- list_for_each_entry(publ, &info->node_list, node_list) {
- if (publ->scope <= limit)
- tipc_dest_push(dports, 0, publ->ref);
+ list_for_each_entry(p, &info->node_list, node_list) {
+ if (p->scope == scope || (!exact && p->scope < scope))
+ tipc_dest_push(dports, 0, p->ref);
}
if (info->cluster_list_size != info->node_list_size)
@@ -681,7 +680,7 @@ exit:
* - Determines if any node local ports overlap
*/
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
- u32 upper, u32 domain,
+ u32 upper, u32 scope,
struct tipc_nlist *nodes)
{
struct sub_seq *sseq, *stop;
@@ -700,7 +699,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
for (; sseq != stop && sseq->lower <= upper; sseq++) {
info = sseq->info;
list_for_each_entry(publ, &info->zone_list, zone_list) {
- if (tipc_in_scope(domain, publ->node))
+ if (publ->scope == scope)
tipc_nlist_add(nodes, publ->node);
}
}
@@ -712,7 +711,7 @@ exit:
/* tipc_nametbl_build_group - build list of communication group members
*/
void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
- u32 type, u32 domain)
+ u32 type, u32 scope)
{
struct sub_seq *sseq, *stop;
struct name_info *info;
@@ -730,9 +729,9 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
for (; sseq != stop; sseq++) {
info = sseq->info;
list_for_each_entry(p, &info->zone_list, zone_list) {
- if (!tipc_in_scope(domain, p->node))
+ if (p->scope != scope)
continue;
- tipc_group_add_member(grp, p->node, p->ref);
+ tipc_group_add_member(grp, p->node, p->ref, p->lower);
}
}
spin_unlock_bh(&seq->lock);
@@ -811,7 +810,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
/**
* tipc_nametbl_subscribe - add a subscription object to the name table
*/
-void tipc_nametbl_subscribe(struct tipc_subscription *s)
+void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
{
struct tipc_net *tn = net_generic(s->net, tipc_net_id);
u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
@@ -825,7 +824,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
if (seq) {
spin_lock_bh(&seq->lock);
- tipc_nameseq_subscribe(seq, s);
+ tipc_nameseq_subscribe(seq, s, status);
spin_unlock_bh(&seq->lock);
} else {
tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 71926e429446..b595d8aa00f0 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -100,8 +100,8 @@ struct name_table {
int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
- u32 limit, struct list_head *dports);
+int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+ u32 scope, bool exact, struct list_head *dports);
void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
u32 type, u32 domain);
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
@@ -121,7 +121,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
u32 lower, u32 node, u32 ref,
u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s);
+void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status);
void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index d60c30342327..8ee5e86b7870 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -489,8 +489,8 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
}
}
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
- u32 lower, u32 upper, int *conid)
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ u32 upper, u32 filter, int *conid)
{
struct tipc_subscriber *scbr;
struct tipc_subscr sub;
@@ -501,7 +501,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
sub.seq.lower = lower;
sub.seq.upper = upper;
sub.timeout = TIPC_WAIT_FOREVER;
- sub.filter = TIPC_SUB_PORTS;
+ sub.filter = filter;
*(u32 *)&sub.usr_handle = port;
con = tipc_alloc_conn(tipc_topsrv(net));
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 2113c9192633..17f49ee44cfd 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -41,6 +41,9 @@
#include <net/net_namespace.h>
#define TIPC_SERVER_NAME_LEN 32
+#define TIPC_SUB_CLUSTER_SCOPE 0x20
+#define TIPC_SUB_NODE_SCOPE 0x40
+#define TIPC_SUB_NO_STATUS 0x80
/**
* struct tipc_server - TIPC server structure
@@ -83,8 +86,8 @@ struct tipc_server {
int tipc_conn_sendmsg(struct tipc_server *s, int conid,
struct sockaddr_tipc *addr, void *data, size_t len);
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
- u32 lower, u32 upper, int *conid);
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ u32 upper, u32 filter, int *conid);
void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
/**
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b51d5cba5094..1f236271766c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -715,7 +715,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_group *grp = tsk->group;
+ struct tipc_group *grp;
u32 revents = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
@@ -736,9 +736,9 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
revents |= POLLIN | POLLRDNORM;
break;
case TIPC_OPEN:
- if (!grp || tipc_group_size(grp))
- if (!tsk->cong_link_cnt)
- revents |= POLLOUT;
+ grp = tsk->group;
+ if ((!grp || tipc_group_is_open(grp)) && !tsk->cong_link_cnt)
+ revents |= POLLOUT;
if (!tipc_sk_type_connectionless(sk))
break;
if (skb_queue_empty(&sk->sk_receive_queue))
@@ -928,21 +928,22 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
struct list_head *cong_links = &tsk->cong_links;
int blks = tsk_blocks(GROUP_H_SIZE + dlen);
struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
struct tipc_member *first = NULL;
struct tipc_member *mbr = NULL;
struct net *net = sock_net(sk);
u32 node, port, exclude;
- u32 type, inst, domain;
struct list_head dsts;
+ u32 type, inst, scope;
int lookups = 0;
int dstcnt, rc;
bool cong;
INIT_LIST_HEAD(&dsts);
- type = dest->addr.name.name.type;
+ type = msg_nametype(hdr);
inst = dest->addr.name.name.instance;
- domain = addr_domain(net, dest->scope);
+ scope = msg_lookup_scope(hdr);
exclude = tipc_group_exclude(grp);
while (++lookups < 4) {
@@ -950,7 +951,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
/* Look for a non-congested destination member, if any */
while (1) {
- if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
+ if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
&dstcnt, exclude, false))
return -EHOSTUNREACH;
tipc_dest_pop(&dsts, &node, &port);
@@ -1079,22 +1080,23 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
{
struct sock *sk = sock->sk;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
- struct tipc_name_seq *seq = &dest->addr.nameseq;
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
- u32 domain, exclude, dstcnt;
+ u32 type, inst, scope, exclude;
struct list_head dsts;
+ u32 dstcnt;
INIT_LIST_HEAD(&dsts);
- if (seq->lower != seq->upper)
- return -ENOTSUPP;
-
- domain = addr_domain(net, dest->scope);
+ type = msg_nametype(hdr);
+ inst = dest->addr.name.name.instance;
+ scope = msg_lookup_scope(hdr);
exclude = tipc_group_exclude(grp);
- if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
- &dsts, &dstcnt, exclude, true))
+
+ if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
+ &dstcnt, exclude, true))
return -EHOSTUNREACH;
if (dstcnt == 1) {
@@ -1116,24 +1118,29 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq)
{
- u32 scope = TIPC_CLUSTER_SCOPE;
u32 self = tipc_own_addr(net);
+ u32 type, lower, upper, scope;
struct sk_buff *skb, *_skb;
- u32 lower = 0, upper = ~0;
- struct sk_buff_head tmpq;
u32 portid, oport, onode;
+ struct sk_buff_head tmpq;
struct list_head dports;
- struct tipc_msg *msg;
- int user, mtyp, hsz;
+ struct tipc_msg *hdr;
+ int user, mtyp, hlen;
+ bool exact;
__skb_queue_head_init(&tmpq);
INIT_LIST_HEAD(&dports);
skb = tipc_skb_peek(arrvq, &inputq->lock);
for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
- msg = buf_msg(skb);
- user = msg_user(msg);
- mtyp = msg_type(msg);
+ hdr = buf_msg(skb);
+ user = msg_user(hdr);
+ mtyp = msg_type(hdr);
+ hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
+ oport = msg_origport(hdr);
+ onode = msg_orignode(hdr);
+ type = msg_nametype(hdr);
+
if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
spin_lock_bh(&inputq->lock);
if (skb_peek(arrvq) == skb) {
@@ -1144,21 +1151,31 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
spin_unlock_bh(&inputq->lock);
continue;
}
- hsz = skb_headroom(skb) + msg_hdr_sz(msg);
- oport = msg_origport(msg);
- onode = msg_orignode(msg);
- if (onode == self)
- scope = TIPC_NODE_SCOPE;
-
- /* Create destination port list and message clones: */
- if (!msg_in_group(msg)) {
- lower = msg_namelower(msg);
- upper = msg_nameupper(msg);
+
+ /* Group messages require exact scope match */
+ if (msg_in_group(hdr)) {
+ lower = 0;
+ upper = ~0;
+ scope = msg_lookup_scope(hdr);
+ exact = true;
+ } else {
+ /* TIPC_NODE_SCOPE means "any scope" in this context */
+ if (onode == self)
+ scope = TIPC_NODE_SCOPE;
+ else
+ scope = TIPC_CLUSTER_SCOPE;
+ exact = false;
+ lower = msg_namelower(hdr);
+ upper = msg_nameupper(hdr);
}
- tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
- scope, &dports);
+
+ /* Create destination port list: */
+ tipc_nametbl_mc_lookup(net, type, lower, upper,
+ scope, exact, &dports);
+
+ /* Clone message per destination */
while (tipc_dest_pop(&dports, NULL, &portid)) {
- _skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
+ _skb = __pskb_copy(skb, hlen, GFP_ATOMIC);
if (_skb) {
msg_set_destport(buf_msg(_skb), portid);
__skb_queue_tail(&tmpq, _skb);
@@ -1933,8 +1950,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
break;
case TOP_SRV:
tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
- skb, inputq, xmitq);
- skb = NULL;
+ hdr, inputq, xmitq);
break;
default:
break;
@@ -2732,7 +2748,6 @@ void tipc_sk_rht_destroy(struct net *net)
static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
{
struct net *net = sock_net(&tsk->sk);
- u32 domain = addr_domain(net, mreq->scope);
struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq seq;
@@ -2740,6 +2755,8 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
if (mreq->type < TIPC_RESERVED_TYPES)
return -EACCES;
+ if (mreq->scope > TIPC_NODE_SCOPE)
+ return -EINVAL;
if (grp)
return -EACCES;
grp = tipc_group_create(net, tsk->portid, mreq);
@@ -2752,16 +2769,16 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
seq.type = mreq->type;
seq.lower = mreq->instance;
seq.upper = seq.lower;
- tipc_nametbl_build_group(net, grp, mreq->type, domain);
+ tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
rc = tipc_sk_publish(tsk, mreq->scope, &seq);
if (rc) {
tipc_group_delete(net, grp);
tsk->group = NULL;
}
-
- /* Eliminate any risk that a broadcast overtakes the sent JOIN */
+ /* Eliminate any risk that a broadcast overtakes sent JOINs */
tsk->mc_method.rcast = true;
tsk->mc_method.mandatory = true;
+ tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf);
return rc;
}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 251065dfd8df..44df528ed6ab 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -118,15 +118,19 @@ void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
u32 found_upper, u32 event, u32 port_ref,
- u32 node, int must)
+ u32 node, u32 scope, int must)
{
+ u32 filter = htohl(sub->evt.s.filter, sub->swap);
struct tipc_name_seq seq;
tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
return;
- if (!must &&
- !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS))
+ if (!must && !(filter & TIPC_SUB_PORTS))
+ return;
+ if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
+ return;
+ if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
return;
tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
@@ -286,7 +290,8 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
}
static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
- struct tipc_subscriber *subscriber, int swap)
+ struct tipc_subscriber *subscriber, int swap,
+ bool status)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_subscription *sub = NULL;
@@ -299,7 +304,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
spin_lock_bh(&subscriber->lock);
list_add(&sub->subscrp_list, &subscriber->subscrp_list);
sub->subscriber = subscriber;
- tipc_nametbl_subscribe(sub);
+ tipc_nametbl_subscribe(sub, status);
tipc_subscrb_get(subscriber);
spin_unlock_bh(&subscriber->lock);
@@ -323,6 +328,7 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
{
struct tipc_subscriber *subscriber = usr_data;
struct tipc_subscr *s = (struct tipc_subscr *)buf;
+ bool status;
int swap;
/* Determine subscriber's endianness */
@@ -334,8 +340,8 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
return tipc_subscrp_cancel(s, subscriber);
}
-
- tipc_subscrp_subscribe(net, s, subscriber, swap);
+ status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
+ tipc_subscrp_subscribe(net, s, subscriber, swap, status);
}
/* Handle one request to establish a new subscriber */
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index ee52957dc952..f3edca775d9f 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -71,7 +71,7 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
u32 found_upper);
void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
u32 found_lower, u32 found_upper, u32 event,
- u32 port_ref, u32 node, int must);
+ u32 port_ref, u32 node, u32 scope, int must);
void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
struct tipc_name_seq *out);
u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 79a9ff682b7e..c084dd2205ac 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -11387,7 +11387,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
break;
case NL80211_NAN_FUNC_FOLLOW_UP:
if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
- !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] ||
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) {
err = -EINVAL;
goto out;
}