From a2d91241a80ec9bbc5ab24b9a2c4d730b3fa5730 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 01:04:42 +0000 Subject: tcp: md5: remove obsolete md5_add() method We no longer use md5_add() method from struct tcp_sock_af_ops Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 337ba4cca052..345e24928fa6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -967,13 +967,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } EXPORT_SYMBOL(tcp_v4_md5_do_add); -static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, - u8 *newkey, u8 newkeylen) -{ - return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr, - newkey, newkeylen); -} - int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) { struct tcp_sock *tp = tcp_sk(sk); @@ -1853,7 +1846,6 @@ EXPORT_SYMBOL(ipv4_specific); static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, - .md5_add = tcp_v4_md5_add_func, .md5_parse = tcp_v4_parse_md5_keys, }; #endif -- cgit v1.2.1 From a915da9b69273815527ccb3789421cb7027b545b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 05:18:33 +0000 Subject: tcp: md5: rcu conversion In order to be able to support proper RST messages for TCP MD5 flows, we need to allow access to MD5 keys without locking listener socket. This conversion is a nice cleanup, and shrinks size of timewait sockets by 80 bytes. IPv6 code reuses generic code found in IPv4 instead of duplicating it. Control path uses GFP_KERNEL allocations instead of GFP_ATOMIC. Signed-off-by: Eric Dumazet Cc: Shawn Lu Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 227 +++++++++++++++++++++------------------------------- 1 file changed, 93 insertions(+), 134 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 345e24928fa6..1d5fd82c5c08 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -90,16 +90,8 @@ EXPORT_SYMBOL(sysctl_tcp_low_latency); #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, - __be32 addr); -static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th); -#else -static inline -struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) -{ - return NULL; -} #endif struct inet_hashinfo tcp_hashinfo; @@ -631,7 +623,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; + key = sk ? tcp_md5_do_lookup(sk, + (union tcp_md5_addr *)&ip_hdr(skb)->saddr, + AF_INET) : NULL; if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -759,7 +753,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, - tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), + tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, + AF_INET), inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos); } @@ -876,146 +871,124 @@ static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, */ /* Find the Key structure for an address. */ -static struct tcp_md5sig_key * - tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) +struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, + const union tcp_md5_addr *addr, + int family) { struct tcp_sock *tp = tcp_sk(sk); - int i; + struct tcp_md5sig_key *key; + struct hlist_node *pos; + unsigned int size = sizeof(struct in_addr); - if (!tp->md5sig_info || !tp->md5sig_info->entries4) + if (!tp->md5sig_info) return NULL; - for (i = 0; i < tp->md5sig_info->entries4; i++) { - if (tp->md5sig_info->keys4[i].addr == addr) - return &tp->md5sig_info->keys4[i].base; +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6) + size = sizeof(struct in6_addr); +#endif + hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) { + if (key->family != family) + continue; + if (!memcmp(&key->addr, addr, size)) + return key; } return NULL; } +EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, struct sock *addr_sk) { - return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); + union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; + return tcp_md5_do_lookup(sk, addr, AF_INET); } EXPORT_SYMBOL(tcp_v4_md5_lookup); static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, struct request_sock *req) { - return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); + union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr; + return tcp_md5_do_lookup(sk, addr, AF_INET); } /* This can be called on a newly created socket, from other files */ -int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, - u8 *newkey, u8 newkeylen) +int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, + int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) { /* Add Key to the list */ struct tcp_md5sig_key *key; struct tcp_sock *tp = tcp_sk(sk); - struct tcp4_md5sig_key *keys; + struct tcp_md5sig_info *md5sig; - key = tcp_v4_md5_do_lookup(sk, addr); + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (key) { /* Pre-existing entry - just update that one. */ - kfree(key->key); - key->key = newkey; + memcpy(key->key, newkey, newkeylen); key->keylen = newkeylen; - } else { - struct tcp_md5sig_info *md5sig; - - if (!tp->md5sig_info) { - tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), - GFP_ATOMIC); - if (!tp->md5sig_info) { - kfree(newkey); - return -ENOMEM; - } - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } + return 0; + } - md5sig = tp->md5sig_info; - if (md5sig->entries4 == 0 && - tcp_alloc_md5sig_pool(sk) == NULL) { - kfree(newkey); + md5sig = tp->md5sig_info; + if (!md5sig) { + md5sig = kmalloc(sizeof(*md5sig), gfp); + if (!md5sig) return -ENOMEM; - } - - if (md5sig->alloced4 == md5sig->entries4) { - keys = kmalloc((sizeof(*keys) * - (md5sig->entries4 + 1)), GFP_ATOMIC); - if (!keys) { - kfree(newkey); - if (md5sig->entries4 == 0) - tcp_free_md5sig_pool(); - return -ENOMEM; - } - if (md5sig->entries4) - memcpy(keys, md5sig->keys4, - sizeof(*keys) * md5sig->entries4); + sk_nocaps_add(sk, NETIF_F_GSO_MASK); + INIT_HLIST_HEAD(&md5sig->head); + tp->md5sig_info = md5sig; + } - /* Free old key list, and reference new one */ - kfree(md5sig->keys4); - md5sig->keys4 = keys; - md5sig->alloced4++; - } - md5sig->entries4++; - md5sig->keys4[md5sig->entries4 - 1].addr = addr; - md5sig->keys4[md5sig->entries4 - 1].base.key = newkey; - md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen; + key = kmalloc(sizeof(*key), gfp); + if (!key) + return -ENOMEM; + if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { + kfree(key); + return -ENOMEM; } + + memcpy(key->key, newkey, newkeylen); + key->keylen = newkeylen; + key->family = family; + memcpy(&key->addr, addr, + (family == AF_INET6) ? sizeof(struct in6_addr) : + sizeof(struct in_addr)); + hlist_add_head_rcu(&key->node, &md5sig->head); return 0; } -EXPORT_SYMBOL(tcp_v4_md5_do_add); +EXPORT_SYMBOL(tcp_md5_do_add); -int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) +int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { struct tcp_sock *tp = tcp_sk(sk); - int i; - - for (i = 0; i < tp->md5sig_info->entries4; i++) { - if (tp->md5sig_info->keys4[i].addr == addr) { - /* Free the key */ - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4--; - - if (tp->md5sig_info->entries4 == 0) { - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; - tcp_free_md5sig_pool(); - } else if (tp->md5sig_info->entries4 != i) { - /* Need to do some manipulation */ - memmove(&tp->md5sig_info->keys4[i], - &tp->md5sig_info->keys4[i+1], - (tp->md5sig_info->entries4 - i) * - sizeof(struct tcp4_md5sig_key)); - } - return 0; - } - } - return -ENOENT; + struct tcp_md5sig_key *key; + + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); + if (!key) + return -ENOENT; + hlist_del_rcu(&key->node); + kfree_rcu(key, rcu); + if (hlist_empty(&tp->md5sig_info->head)) + tcp_free_md5sig_pool(); + return 0; } -EXPORT_SYMBOL(tcp_v4_md5_do_del); +EXPORT_SYMBOL(tcp_md5_do_del); -static void tcp_v4_clear_md5_list(struct sock *sk) +void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct tcp_md5sig_key *key; + struct hlist_node *pos, *n; - /* Free each key, then the set of key keys, - * the crypto element, and then decrement our - * hold on the last resort crypto. - */ - if (tp->md5sig_info->entries4) { - int i; - for (i = 0; i < tp->md5sig_info->entries4; i++) - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4 = 0; + if (!hlist_empty(&tp->md5sig_info->head)) tcp_free_md5sig_pool(); - } - if (tp->md5sig_info->keys4) { - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; + hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) { + hlist_del_rcu(&key->node); + kfree_rcu(key, rcu); } } @@ -1024,7 +997,6 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, { struct tcp_md5sig cmd; struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; - u8 *newkey; if (optlen < sizeof(cmd)) return -EINVAL; @@ -1038,29 +1010,16 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (!cmd.tcpm_key || !cmd.tcpm_keylen) { if (!tcp_sk(sk)->md5sig_info) return -ENOENT; - return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, + AF_INET); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - if (!tcp_sk(sk)->md5sig_info) { - struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_info *p; - - p = kzalloc(sizeof(*p), sk->sk_allocation); - if (!p) - return -EINVAL; - - tp->md5sig_info = p; - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } - - newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); - if (!newkey) - return -ENOMEM; - return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, - newkey, cmd.tcpm_keylen); + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, + AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, + GFP_KERNEL); } static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, @@ -1086,7 +1045,7 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); } -static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th) { struct tcp_md5sig_pool *hp; @@ -1186,7 +1145,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) int genhash; unsigned char newhash[16]; - hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); + hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, + AF_INET); hash_location = tcp_parse_md5sig_option(th); /* We've parsed the options - do we have a hash? */ @@ -1474,7 +1434,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr); + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, + AF_INET); if (key != NULL) { /* * We're using one, so create a matching key @@ -1482,10 +1443,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * memory, then we end up not copying the key * across. Shucks. */ - char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); - if (newkey != NULL) - tcp_v4_md5_do_add(newsk, newinet->inet_daddr, - newkey, key->keylen); + tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, + AF_INET, key->key, key->keylen, GFP_ATOMIC); sk_nocaps_add(newsk, NETIF_F_GSO_MASK); } #endif @@ -1934,7 +1893,7 @@ void tcp_v4_destroy_sock(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { - tcp_v4_clear_md5_list(sk); + tcp_clear_md5_list(sk); kfree(tp->md5sig_info); tp->md5sig_info = NULL; } -- cgit v1.2.1 From 5f3d9cb2962967d9d7e03abb4a7ca275a9a3fea5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 10:56:48 +0000 Subject: tcp: md5: use sock_kmalloc() to limit md5 keys There is no limit on number of MD5 keys an application can attach to a tcp socket. This patch adds a per tcp socket limit based on /proc/sys/net/core/optmem_max With current default optmem_max values, this allows about 150 keys on 64bit arches, and 88 keys on 32bit arches. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1d5fd82c5c08..da5d3226771b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -943,11 +943,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, tp->md5sig_info = md5sig; } - key = kmalloc(sizeof(*key), gfp); + key = sock_kmalloc(sk, sizeof(*key), gfp); if (!key) return -ENOMEM; if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { - kfree(key); + sock_kfree_s(sk, key, sizeof(*key)); return -ENOMEM; } @@ -971,6 +971,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) if (!key) return -ENOENT; hlist_del_rcu(&key->node); + atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); if (hlist_empty(&tp->md5sig_info->head)) tcp_free_md5sig_pool(); @@ -988,6 +989,7 @@ void tcp_clear_md5_list(struct sock *sk) tcp_free_md5sig_pool(); hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) { hlist_del_rcu(&key->node); + atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); } } -- cgit v1.2.1 From a8afca032998850ec63e83d555cdcf0eb5680cd6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 18:45:40 +0000 Subject: tcp: md5: protects md5sig_info with RCU This patch makes sure we use appropriate memory barriers before publishing tp->md5sig_info, allowing tcp_md5_do_lookup() being used from tcp_v4_send_reset() without holding socket lock (upcoming patch from Shawn Lu) Note we also need to respect rcu grace period before its freeing, since we can free socket without this grace period thanks to SLAB_DESTROY_BY_RCU Signed-off-by: Eric Dumazet Cc: Shawn Lu Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index da5d3226771b..567cca9b30df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -879,14 +879,18 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, struct tcp_md5sig_key *key; struct hlist_node *pos; unsigned int size = sizeof(struct in_addr); + struct tcp_md5sig_info *md5sig; - if (!tp->md5sig_info) + /* caller either holds rcu_read_lock() or socket lock */ + md5sig = rcu_dereference_check(tp->md5sig_info, + sock_owned_by_user(sk)); + if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) size = sizeof(struct in6_addr); #endif - hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) { + hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { if (key->family != family) continue; if (!memcmp(&key->addr, addr, size)) @@ -932,7 +936,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, return 0; } - md5sig = tp->md5sig_info; + md5sig = rcu_dereference_protected(tp->md5sig_info, + sock_owned_by_user(sk)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) @@ -940,7 +945,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, sk_nocaps_add(sk, NETIF_F_GSO_MASK); INIT_HLIST_HEAD(&md5sig->head); - tp->md5sig_info = md5sig; + rcu_assign_pointer(tp->md5sig_info, md5sig); } key = sock_kmalloc(sk, sizeof(*key), gfp); @@ -966,6 +971,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; + struct tcp_md5sig_info *md5sig; key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (!key) @@ -973,7 +979,9 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); - if (hlist_empty(&tp->md5sig_info->head)) + md5sig = rcu_dereference_protected(tp->md5sig_info, + sock_owned_by_user(sk)); + if (hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); return 0; } @@ -984,10 +992,13 @@ void tcp_clear_md5_list(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; struct hlist_node *pos, *n; + struct tcp_md5sig_info *md5sig; - if (!hlist_empty(&tp->md5sig_info->head)) + md5sig = rcu_dereference_protected(tp->md5sig_info, 1); + + if (!hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); - hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) { + hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); @@ -1009,12 +1020,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (sin->sin_family != AF_INET) return -EINVAL; - if (!cmd.tcpm_key || !cmd.tcpm_keylen) { - if (!tcp_sk(sk)->md5sig_info) - return -ENOENT; + if (!cmd.tcpm_key || !cmd.tcpm_keylen) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, AF_INET); - } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; @@ -1896,7 +1904,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { tcp_clear_md5_list(sk); - kfree(tp->md5sig_info); + kfree_rcu(tp->md5sig_info, rcu); tp->md5sig_info = NULL; } #endif -- cgit v1.2.1 From 658ddaaf6694adf63f67451dec9ddeb87a7cb2d7 Mon Sep 17 00:00:00 2001 From: Shawn Lu Date: Tue, 31 Jan 2012 22:35:48 +0000 Subject: tcp: md5: RST: getting md5 key from listener TCP RST mechanism is broken in TCP md5(RFC2385). When connection is gone, md5 key is lost, sending RST without md5 hash is deem to ignored by peer. This can be a problem since RST help protocal like bgp to fast recove from peer crash. In most case, users of tcp md5, such as bgp and ldp, have listener on both sides to accept connection from peer. md5 keys for peers are saved in listening socket. There are two cases in finding md5 key when connection is lost: 1.Passive receive RST: The message is send to well known port, tcp will associate it with listner. md5 key is gotten from listener. 2.Active receive RST (no sock): The message is send to ative side, there is no socket associated with the message. In this case, finding listener from source port, then find md5 key from listener. we are not loosing sercuriy here: packet is checked with md5 hash. No RST is generated if md5 hash doesn't match or no md5 key can be found. Signed-off-by: Shawn Lu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 567cca9b30df..90e47931e217 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -593,6 +593,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; + const __u8 *hash_location = NULL; + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; #endif struct net *net; @@ -623,9 +627,36 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_md5_do_lookup(sk, - (union tcp_md5_addr *)&ip_hdr(skb)->saddr, - AF_INET) : NULL; + hash_location = tcp_parse_md5sig_option(th); + if (!sk && hash_location) { + /* + * active side is lost. Try to find listening socket through + * source port, and then find md5 key through listening socket. + * we are not loose security here: + * Incoming packet is checked with md5 hash with finding key, + * no RST generated if md5 hash doesn't match. + */ + sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), + &tcp_hashinfo, ip_hdr(skb)->daddr, + ntohs(th->source), inet_iif(skb)); + /* don't send rst if it can't find key */ + if (!sk1) + return; + rcu_read_lock(); + key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, AF_INET); + if (!key) + goto release_sk1; + + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); + if (genhash || memcmp(hash_location, newhash, 16) != 0) + goto release_sk1; + } else { + key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, + AF_INET) : NULL; + } + if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -653,6 +684,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + +#ifdef CONFIG_TCP_MD5SIG +release_sk1: + if (sk1) { + rcu_read_unlock(); + sock_put(sk1); + } +#endif } /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states -- cgit v1.2.1 From 4c507d2897bd9be810b3403ade73b04cf6fdfd4a Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 9 Feb 2012 09:35:49 +0000 Subject: net: implement IP_RECVTOS for IP_PKTOPTIONS Currently, it is not easily possible to get TOS/DSCP value of packets from an incoming TCP stream. The mechanism is there, IP_PKTOPTIONS getsockopt with IP_RECVTOS set, the same way as incoming TTL can be queried. This is not actually implemented for TOS, though. This patch adds this functionality, both for IPv4 (IP_PKTOPTIONS) and IPv6 (IPV6_2292PKTOPTIONS). For IPv4, like in the IP_RECVTTL case, the value of the TOS field is stored from the other party's ACK. This is needed for proxies which require DSCP transparency. One such example is at http://zph.bratcheda.org/. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4d6f81c818dc..94abee8cf563 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1463,6 +1463,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->rcv_tos = ip_hdr(skb)->tos; inet_csk(newsk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; -- cgit v1.2.1 From b4fb05ea402cb6930b40d3152d8acabc391b23e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Mar 2012 04:45:43 +0000 Subject: tcp: md5: correct a RCU lockdep splat commit a8afca0329 (tcp: md5: protects md5sig_info with RCU) added a lockdep splat in tcp_md5_do_lookup() in case a timer fires a tcp retransmit. At this point, socket lock is owned by the sofirq handler, not the user, so we should adjust a bit the lockdep condition, as we dont hold rcu_read_lock(). Signed-off-by: Eric Dumazet Reported-by: Valdis Kletnieks Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 94abee8cf563..507924b640ef 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -927,7 +927,8 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, /* caller either holds rcu_read_lock() or socket lock */ md5sig = rcu_dereference_check(tp->md5sig_info, - sock_owned_by_user(sk)); + sock_owned_by_user(sk) || + lockdep_is_held(&sk->sk_lock.slock)); if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) -- cgit v1.2.1 From 058bd4d2a4ff0aaa4a5381c67e776729d840c785 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 11 Mar 2012 18:36:11 +0000 Subject: net: Convert printks to pr_ Use a more current kernel messaging style. Convert a printk block to print_hex_dump. Coalesce formats, align arguments. Use %s, __func__ instead of embedding function names. Some messages that were prefixed with _close are now prefixed with _fini. Some ah4 and esp messages are now not prefixed with "ip ". The intent of this patch is to later add something like #define pr_fmt(fmt) "IPv4: " fmt. to standardize the output messages. Text size is trivially reduced. (x86-32 allyesconfig) $ size net/ipv4/built-in.o* text data bss dec hex filename 887888 31558 249696 1169142 11d6f6 net/ipv4/built-in.o.new 887934 31558 249800 1169292 11d78c net/ipv4/built-in.o.old Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 507924b640ef..257dba66eaca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1227,10 +1227,10 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", - &iph->saddr, ntohs(th->source), - &iph->daddr, ntohs(th->dest), - genhash ? " tcp_v4_calc_md5_hash failed" : ""); + pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", + &iph->saddr, ntohs(th->source), + &iph->daddr, ntohs(th->dest), + genhash ? " tcp_v4_calc_md5_hash failed" : ""); } return 1; } -- cgit v1.2.1 From afd465030acb4098abcb6b965a5aebc7ea2209e0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 12 Mar 2012 07:03:32 +0000 Subject: net: ipv4: Standardize prefixes for message logging Add #define pr_fmt(fmt) as appropriate. Add "IPv4: ", "TCP: ", and "IPsec: " to appropriate files. Standardize on "UDPLite: " for appropriate uses. Some prefixes were previously "UDPLITE: " and "UDP-Lite: ". Add KBUILD_MODNAME ": " to icmp and gre. Remove embedded prefixes as appropriate. Add missing "\n" to pr_info in gre.c. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 257dba66eaca..fe9f604ed1e2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -50,6 +50,7 @@ * a single port at the same time. */ +#define pr_fmt(fmt) "TCP: " fmt #include #include @@ -876,8 +877,7 @@ int tcp_syn_flood_action(struct sock *sk, lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; if (!lopt->synflood_warned) { lopt->synflood_warned = 1; - pr_info("%s: Possible SYN flooding on port %d. %s. " - " Check SNMP counters.\n", + pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, ntohs(tcp_hdr(skb)->dest), msg); } return want_cookie; @@ -1399,7 +1399,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * to destinations, already remembered * to the moment of synflood. */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n", + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), &saddr, ntohs(tcp_hdr(skb)->source)); goto drop_and_release; } -- cgit v1.2.1