summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/vrf.c41
-rw-r--r--include/net/ip6_route.h21
-rw-r--r--include/net/l3mdev.h42
-rw-r--r--net/ipv6/addrconf.c48
-rw-r--r--net/ipv6/ip6_output.c12
-rw-r--r--net/ipv6/route.c17
-rw-r--r--net/l3mdev/l3mdev.c24
7 files changed, 183 insertions, 22 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 32173aa9208e..b3762822b653 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -999,6 +999,46 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
return dst;
}
+
+/* called under rcu_read_lock */
+static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
+ struct flowi6 *fl6)
+{
+ struct net *net = dev_net(dev);
+ struct dst_entry *dst;
+ struct rt6_info *rt;
+ int err;
+
+ if (rt6_need_strict(&fl6->daddr)) {
+ rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif,
+ RT6_LOOKUP_F_IFACE);
+ if (unlikely(!rt))
+ return 0;
+
+ dst = &rt->dst;
+ } else {
+ __u8 flags = fl6->flowi6_flags;
+
+ fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
+ fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+
+ dst = ip6_route_output(net, sk, fl6);
+ rt = (struct rt6_info *)dst;
+
+ fl6->flowi6_flags = flags;
+ }
+
+ err = dst->error;
+ if (!err) {
+ err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+ sk ? inet6_sk(sk)->srcprefs : 0,
+ &fl6->saddr);
+ }
+
+ dst_release(dst);
+
+ return err;
+}
#endif
static const struct l3mdev_ops vrf_l3mdev_ops = {
@@ -1008,6 +1048,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_l3_rcv = vrf_l3_rcv,
#if IS_ENABLED(CONFIG_IPV6)
.l3mdev_get_rt6_dst = vrf_get_rt6_dst,
+ .l3mdev_get_saddr6 = vrf_get_saddr6,
#endif
};
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index f55bf3d294aa..d97305d0e71f 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -18,6 +18,7 @@ struct route_info {
__u8 prefix[0]; /* 0,8 or 16 */
};
+#include <net/addrconf.h>
#include <net/flow.h>
#include <net/ip6_fib.h>
#include <net/sock.h>
@@ -88,9 +89,23 @@ int ip6_route_add(struct fib6_config *cfg);
int ip6_ins_rt(struct rt6_info *);
int ip6_del_rt(struct rt6_info *);
-int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
- const struct in6_addr *daddr, unsigned int prefs,
- struct in6_addr *saddr);
+static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
+ const struct in6_addr *daddr,
+ unsigned int prefs,
+ struct in6_addr *saddr)
+{
+ struct inet6_dev *idev =
+ rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
+ int err = 0;
+
+ if (rt && rt->rt6i_prefsrc.plen)
+ *saddr = rt->rt6i_prefsrc.addr;
+ else
+ err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
+ daddr, prefs, saddr);
+
+ return err;
+}
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
const struct in6_addr *saddr, int oif, int flags);
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index f8a416ec674c..e90095091aa0 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -39,6 +39,9 @@ struct l3mdev_ops {
/* IPv6 ops */
struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
struct flowi6 *fl6);
+ int (*l3mdev_get_saddr6)(struct net_device *dev,
+ const struct sock *sk,
+ struct flowi6 *fl6);
};
#ifdef CONFIG_NET_L3_MASTER_DEV
@@ -76,6 +79,31 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
return rc;
}
+static inline
+const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
+{
+ /* netdev_master_upper_dev_get_rcu calls
+ * list_first_or_null_rcu to walk the upper dev list.
+ * list_first_or_null_rcu does not handle a const arg. We aren't
+ * making changes, just want the master device from that list so
+ * typecast to remove the const
+ */
+ struct net_device *dev = (struct net_device *)_dev;
+ const struct net_device *master;
+
+ if (!dev)
+ return NULL;
+
+ if (netif_is_l3_master(dev))
+ master = dev;
+ else if (netif_is_l3_slave(dev))
+ master = netdev_master_upper_dev_get_rcu(dev);
+ else
+ master = NULL;
+
+ return master;
+}
+
/* get index of an interface to use for FIB lookups. For devices
* enslaved to an L3 master device FIB lookups are based on the
* master index
@@ -140,6 +168,8 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
+int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6);
static inline
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -185,6 +215,12 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
return 0;
}
+static inline
+const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
+{
+ return NULL;
+}
+
static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
{
return dev ? dev->ifindex : 0;
@@ -230,6 +266,12 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
return NULL;
}
+static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6)
+{
+ return 0;
+}
+
static inline
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
{
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6c8fc3f96b11..a1f6b7b31531 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1524,6 +1524,28 @@ out:
return hiscore_idx;
}
+static int ipv6_get_saddr_master(struct net *net,
+ const struct net_device *dst_dev,
+ const struct net_device *master,
+ struct ipv6_saddr_dst *dst,
+ struct ipv6_saddr_score *scores,
+ int hiscore_idx)
+{
+ struct inet6_dev *idev;
+
+ idev = __in6_dev_get(dst_dev);
+ if (idev)
+ hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
+ scores, hiscore_idx);
+
+ idev = __in6_dev_get(master);
+ if (idev)
+ hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
+ scores, hiscore_idx);
+
+ return hiscore_idx;
+}
+
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
@@ -1577,13 +1599,39 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
if (idev)
hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
} else {
+ const struct net_device *master;
+ int master_idx = 0;
+
+ /* if dst_dev exists and is enslaved to an L3 device, then
+ * prefer addresses from dst_dev and then the master over
+ * any other enslaved devices in the L3 domain.
+ */
+ master = l3mdev_master_dev_rcu(dst_dev);
+ if (master) {
+ master_idx = master->ifindex;
+
+ hiscore_idx = ipv6_get_saddr_master(net, dst_dev,
+ master, &dst,
+ scores, hiscore_idx);
+
+ if (scores[hiscore_idx].ifa)
+ goto out;
+ }
+
for_each_netdev_rcu(net, dev) {
+ /* only consider addresses on devices in the
+ * same L3 domain
+ */
+ if (l3mdev_master_ifindex_rcu(dev) != master_idx)
+ continue;
idev = __in6_dev_get(dev);
if (!idev)
continue;
hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
}
}
+
+out:
rcu_read_unlock();
hiscore = &scores[hiscore_idx];
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index fd3217579b65..1dfc402d9ad1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -910,6 +910,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
int err;
int flags = 0;
+ if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
+ (!*dst || !(*dst)->error)) {
+ err = l3mdev_get_saddr6(net, sk, fl6);
+ if (err)
+ goto out_err;
+ }
+
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
* the route-specific preferred source forces the
@@ -999,10 +1006,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
return 0;
out_err_release:
- if (err == -ENETUNREACH)
- IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
dst_release(*dst);
*dst = NULL;
+out_err:
+ if (err == -ENETUNREACH)
+ IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
return err;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9e1516785dac..08b77f421268 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2586,23 +2586,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
return rt;
}
-int ip6_route_get_saddr(struct net *net,
- struct rt6_info *rt,
- const struct in6_addr *daddr,
- unsigned int prefs,
- struct in6_addr *saddr)
-{
- struct inet6_dev *idev =
- rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
- int err = 0;
- if (rt && rt->rt6i_prefsrc.plen)
- *saddr = rt->rt6i_prefsrc.addr;
- else
- err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
- daddr, prefs, saddr);
- return err;
-}
-
/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
struct net_device *dev;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index d90e4ef09e85..c4a1c3e84e12 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -162,6 +162,30 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
}
EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
+int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
+ struct flowi6 *fl6)
+{
+ struct net_device *dev;
+ int rc = 0;
+
+ if (fl6->flowi6_oif) {
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
+ if (dev && netif_is_l3_slave(dev))
+ dev = netdev_master_upper_dev_get_rcu(dev);
+
+ if (dev && netif_is_l3_master(dev) &&
+ dev->l3mdev_ops->l3mdev_get_saddr6)
+ rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6);
+
+ rcu_read_unlock();
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(l3mdev_get_saddr6);
+
/**
* l3mdev_fib_rule_match - Determine if flowi references an
* L3 master device