From 6869c4d8e066e21623c812c448a05f1ed931c9c6 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:24:19 -0700 Subject: [NETFILTER]: reduce netfilter sk_buff enlargement As discussed at netconf'05, we're trying to save every bit in sk_buff. The patch below makes sk_buff 8 bytes smaller. I did some basic testing on my notebook and it seems to work. The only real in-tree user of nfcache was IPVS, who only needs a single bit. Unfortunately I couldn't find some other free bit in sk_buff to stuff that bit into, so I introduced a separate field for them. Maybe the IPVS guys can resolve that to further save space. Initially I wanted to shrink pkt_type to three bits (PACKET_HOST and alike are only 6 values defined), but unfortunately the bluetooth code overloads pkt_type :( The conntrack-event-api (out-of-tree) uses nfcache, but Rusty just came up with a way how to do it without any skb fields, so it's safe to remove it. - remove all never-implemented 'nfcache' code - don't have ipvs code abuse 'nfcache' field. currently get's their own compile-conditional skb->ipvs_property field. IPVS maintainers can decide to move this bit elswhere, but nfcache needs to die. - remove skb->nfcache field to save 4 bytes - move skb->nfctinfo into three unused bits to save further 4 bytes Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7eab867ede59..096991cb09d9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -361,7 +361,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->destructor = NULL; #ifdef CONFIG_NETFILTER C(nfmark); - C(nfcache); C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); @@ -424,7 +423,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->destructor = NULL; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; - new->nfcache = old->nfcache; new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; -- cgit v1.2.1 From 8728b834b226ffcf2c94a58530090e292af2a7bf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:25:21 -0700 Subject: [NET]: Kill skb->list Remove the "list" member of struct sk_buff, as it is entirely redundant. All SKB list removal callers know which list the SKB is on, so storing this in sk_buff does nothing other than taking up some space. Two tricky bits were SCTP, which I took care of, and two ATM drivers which Francois Romieu fixed up. Signed-off-by: David S. Miller Signed-off-by: Francois Romieu --- net/core/skbuff.c | 57 +++++++++++++++++++++++-------------------------------- 1 file changed, 24 insertions(+), 33 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 096991cb09d9..e6564b0a6839 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -281,8 +281,6 @@ void kfree_skbmem(struct sk_buff *skb) void __kfree_skb(struct sk_buff *skb) { - BUG_ON(skb->list != NULL); - dst_release(skb->dst); #ifdef CONFIG_XFRM secpath_put(skb->sp); @@ -333,7 +331,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) #define C(x) n->x = skb->x n->next = n->prev = NULL; - n->list = NULL; n->sk = NULL; C(stamp); C(dev); @@ -403,7 +400,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) */ unsigned long offset = new->data - old->data; - new->list = NULL; new->sk = NULL; new->dev = old->dev; new->real_dev = old->real_dev; @@ -1342,50 +1338,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } + /** * skb_unlink - remove a buffer from a list * @skb: buffer to remove + * @list: list to use * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls + * Remove a packet from a list. The list locks are taken and this + * function is atomic with respect to other list locked calls * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. + * You must know what list the SKB is on. */ -void skb_unlink(struct sk_buff *skb) +void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff_head *list = skb->list; - - if (list) { - unsigned long flags; + unsigned long flags; - spin_lock_irqsave(&list->lock, flags); - if (skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } + spin_lock_irqsave(&list->lock, flags); + __skb_unlink(skb, list); + spin_unlock_irqrestore(&list->lock, flags); } - /** * skb_append - append a buffer * @old: buffer to insert after * @newsk: buffer to insert + * @list: list to use * * Place a packet after a given packet in a list. The list locks are taken * and this function is atomic with respect to other list locked calls. * A buffer cannot be placed on two lists at the same time. */ - -void skb_append(struct sk_buff *old, struct sk_buff *newsk) +void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_append(old, newsk, list); + spin_unlock_irqrestore(&list->lock, flags); } @@ -1393,19 +1382,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk) * skb_insert - insert a buffer * @old: buffer to insert before * @newsk: buffer to insert + * @list: list to use + * + * Place a packet before a given packet in a list. The list locks are + * taken and this function is atomic with respect to other list locked + * calls. * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls * A buffer cannot be placed on two lists at the same time. */ - -void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_insert(newsk, old->prev, old, list); + spin_unlock_irqrestore(&list->lock, flags); } #if 0 -- cgit v1.2.1 From abc3bc58047efa72ee9c2e208cbeb73d261ad703 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:25:56 -0700 Subject: [NET]: Kill skb->tc_classid Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e6564b0a6839..8896e6f8aa42 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -300,7 +300,6 @@ void __kfree_skb(struct sk_buff *skb) skb->tc_index = 0; #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; - skb->tc_classid = 0; #endif #endif @@ -376,7 +375,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(input_dev); - C(tc_classid); #endif #endif -- cgit v1.2.1 From 6f1cf16582160c4839f05007c978743911aa022b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 9 Aug 2005 19:31:17 -0700 Subject: [NET]: Remove HIPPI private from skbuff.h This removes the private element from skbuff, that is only used by HIPPI. Instead it uses skb->cb[] to hold the additional data that is needed in the output path from hard_header to device driver. PS: The only qdisc that might potentially corrupt this cb[] is if netem was used over HIPPI. I will take care of that by fixing netem to use skb->stamp. I don't expect many users of netem over HIPPI Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8896e6f8aa42..16df7bd77e78 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -365,9 +365,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) nf_bridge_get(skb->nf_bridge); #endif #endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT -- cgit v1.2.1 From f2ccd8fa06c8e302116e71df372f5c1f83432e03 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:34:12 -0700 Subject: [NET]: Kill skb->real_dev Bonding just wants the device before the skb_bond() decapsulation occurs, so simply pass that original device into packet_type->func() as an argument. It remains to be seen whether we can use this same exact thing to get rid of skb->input_dev as well. Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 16df7bd77e78..ef498cb9f786 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -333,7 +333,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->sk = NULL; C(stamp); C(dev); - C(real_dev); C(h); C(nh); C(mac); @@ -397,7 +396,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->sk = NULL; new->dev = old->dev; - new->real_dev = old->real_dev; new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); -- cgit v1.2.1 From a61bbcf28a8cb0ba56f8193d512f7222e711a294 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 17:24:31 -0700 Subject: [NET]: Store skb->timestamp as offset to a base timestamp Reduces skb size by 8 bytes on 64-bit. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/skbuff.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef498cb9f786..39a161dbc16d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -70,6 +70,8 @@ static kmem_cache_t *skbuff_head_cache; +struct timeval __read_mostly skb_tv_base; + /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -331,7 +333,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->next = n->prev = NULL; n->sk = NULL; - C(stamp); + C(tstamp); C(dev); C(h); C(nh); @@ -408,7 +410,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) memcpy(new->cb, old->cb, sizeof(old->cb)); new->local_df = old->local_df; new->pkt_type = old->pkt_type; - new->stamp = old->stamp; + new->tstamp = old->tstamp; new->destructor = NULL; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; @@ -1645,6 +1647,7 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); @@ -1678,3 +1681,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); +EXPORT_SYMBOL(skb_tv_base); -- cgit v1.2.1 From d179cd12928443f3ec29cfbc3567439644bd0afc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 17 Aug 2005 14:57:30 -0700 Subject: [NET]: Implement SKB fast cloning. Protocols that make extensive use of SKB cloning, for example TCP, eat at least 2 allocations per packet sent as a result. To cut the kmalloc() count in half, we implement a pre-allocation scheme wherein we allocate 2 sk_buff objects in advance, then use a simple reference count to free up the memory at the correct time. Based upon an initial patch by Thomas Graf and suggestions from Herbert Xu. Signed-off-by: David S. Miller --- net/core/skbuff.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 72 insertions(+), 10 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 39a161dbc16d..b853a9b29eb6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -69,6 +69,7 @@ #include static kmem_cache_t *skbuff_head_cache; +static kmem_cache_t *skbuff_fclone_cache; struct timeval __read_mostly skb_tv_base; @@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) */ /** - * alloc_skb - allocate a network buffer + * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask * @@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) +struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask, + int fclone) { struct sk_buff *skb; u8 *data; /* Get the HEAD */ - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); + if (fclone) + skb = kmem_cache_alloc(skbuff_fclone_cache, + gfp_mask & ~__GFP_DMA); + else + skb = kmem_cache_alloc(skbuff_head_cache, + gfp_mask & ~__GFP_DMA); + if (!skb) goto out; @@ -155,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) skb->data = data; skb->tail = data; skb->end = data + size; + if (fclone) { + struct sk_buff *child = skb + 1; + atomic_t *fclone_ref = (atomic_t *) (child + 1); + skb->fclone = SKB_FCLONE_ORIG; + atomic_set(fclone_ref, 1); + + child->fclone = SKB_FCLONE_UNAVAILABLE; + } atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->tso_size = 0; @@ -268,8 +283,34 @@ void skb_release_data(struct sk_buff *skb) */ void kfree_skbmem(struct sk_buff *skb) { + struct sk_buff *other; + atomic_t *fclone_ref; + skb_release_data(skb); - kmem_cache_free(skbuff_head_cache, skb); + switch (skb->fclone) { + case SKB_FCLONE_UNAVAILABLE: + kmem_cache_free(skbuff_head_cache, skb); + break; + + case SKB_FCLONE_ORIG: + fclone_ref = (atomic_t *) (skb + 2); + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, skb); + break; + + case SKB_FCLONE_CLONE: + fclone_ref = (atomic_t *) (skb + 1); + other = skb - 1; + + /* The clone portion is available for + * fast-cloning again. + */ + skb->fclone = SKB_FCLONE_UNAVAILABLE; + + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, other); + break; + }; } /** @@ -324,10 +365,20 @@ void __kfree_skb(struct sk_buff *skb) struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) { - struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - - if (!n) - return NULL; + struct sk_buff *n; + + n = skb + 1; + if (skb->fclone == SKB_FCLONE_ORIG && + n->fclone == SKB_FCLONE_UNAVAILABLE) { + atomic_t *fclone_ref = (atomic_t *) (n + 1); + n->fclone = SKB_FCLONE_CLONE; + atomic_inc(fclone_ref); + } else { + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (!n) + return NULL; + n->fclone = SKB_FCLONE_UNAVAILABLE; + } #define C(x) n->x = skb->x @@ -409,6 +460,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac.raw = old->mac.raw + offset; memcpy(new->cb, old->cb, sizeof(old->cb)); new->local_df = old->local_df; + new->fclone = SKB_FCLONE_UNAVAILABLE; new->pkt_type = old->pkt_type; new->tstamp = old->tstamp; new->destructor = NULL; @@ -1647,13 +1699,23 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + + skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", + (2*sizeof(struct sk_buff)) + + sizeof(atomic_t), + 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!skbuff_fclone_cache) + panic("cannot create skbuff cache"); + do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); -EXPORT_SYMBOL(alloc_skb); +EXPORT_SYMBOL(__alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); -- cgit v1.2.1 From ba89966c1984513f4f2cc0a6c182266be44ddd03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Aug 2005 12:05:31 -0700 Subject: [NET]: use __read_mostly on kmem_cache_t , DEFINE_SNMP_STAT pointers This patch puts mostly read only data in the right section (read_mostly), to help sharing of these data between CPUS without memory ping pongs. On one of my production machine, tcp_statistics was sitting in a heavily modified cache line, so *every* SNMP update had to force a reload. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/skbuff.c') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b853a9b29eb6..f80a28785610 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,8 +68,8 @@ #include #include -static kmem_cache_t *skbuff_head_cache; -static kmem_cache_t *skbuff_fclone_cache; +static kmem_cache_t *skbuff_head_cache __read_mostly; +static kmem_cache_t *skbuff_fclone_cache __read_mostly; struct timeval __read_mostly skb_tv_base; -- cgit v1.2.1