X-Git-Url: http://git.samba.org/samba.git/?p=sfrench%2Fcifs-2.6.git;a=blobdiff_plain;f=net%2Fipv4%2Fnetfilter%2Fip_conntrack_core.c;h=8556a4f4f60abde4adf99135e4c28967f4d2d96b;hp=aa459177c3f8e11fd5ae0d516dc6c37586360d7c;hb=e18b890bb0881bbab6f4f1a6cd20d9c60d66b003;hpb=6ab3d5624e172c553004ecc862bfeac16d9d68b7 diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index aa459177c3f8..8556a4f4f60a 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -40,14 +40,10 @@ /* ip_conntrack_lock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/ -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include #include #include #include -#include #define IP_CONNTRACK_VERSION "2.4" @@ -64,17 +60,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0); void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; LIST_HEAD(ip_conntrack_expect_list); -struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; +struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly; static LIST_HEAD(helpers); -unsigned int ip_conntrack_htable_size = 0; -int ip_conntrack_max; -struct list_head *ip_conntrack_hash; -static kmem_cache_t *ip_conntrack_cachep __read_mostly; -static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; +unsigned int ip_conntrack_htable_size __read_mostly = 0; +int ip_conntrack_max __read_mostly; +struct list_head *ip_conntrack_hash __read_mostly; +static struct kmem_cache *ip_conntrack_cachep __read_mostly; +static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; -unsigned int ip_ct_log_invalid; +unsigned int ip_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); -static int ip_conntrack_vmalloc; +static int ip_conntrack_vmalloc __read_mostly; static unsigned int ip_conntrack_next_id; static unsigned int ip_conntrack_expect_next_id; @@ -150,8 +146,8 @@ static unsigned int ip_conntrack_hash_rnd; static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, unsigned int size, unsigned int rnd) { - return (jhash_3words(tuple->src.ip, - (tuple->dst.ip ^ tuple->dst.protonum), + return (jhash_3words((__force u32)tuple->src.ip, + ((__force u32)tuple->dst.ip ^ tuple->dst.protonum), (tuple->src.u.all | (tuple->dst.u.all << 16)), rnd) % size); } @@ -202,7 +198,6 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, /* ip_conntrack_expect helper functions */ void ip_ct_unlink_expect(struct ip_conntrack_expect *exp) { - ASSERT_WRITE_LOCK(&ip_conntrack_lock); IP_NF_ASSERT(!timer_pending(&exp->timeout)); list_del(&exp->list); CONNTRACK_STAT_INC(expect_delete); @@ -226,22 +221,22 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) struct ip_conntrack_expect *i; list_for_each_entry(i, &ip_conntrack_expect_list, list) { - if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { - atomic_inc(&i->use); + if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; - } } return NULL; } /* Just find a expectation corresponding to a tuple. */ struct ip_conntrack_expect * -ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) { struct ip_conntrack_expect *i; read_lock_bh(&ip_conntrack_lock); i = __ip_conntrack_expect_find(tuple); + if (i) + atomic_inc(&i->use); read_unlock_bh(&ip_conntrack_lock); return i; @@ -294,15 +289,9 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct) static void clean_from_lists(struct ip_conntrack *ct) { - unsigned int ho, hr; - DEBUGP("clean_from_lists(%p)\n", ct); - ASSERT_WRITE_LOCK(&ip_conntrack_lock); - - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); /* Destroy all pending expectations */ ip_ct_remove_expectations(ct); @@ -313,6 +302,7 @@ destroy_conntrack(struct nf_conntrack *nfct) { struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; + struct ip_conntrack_helper *helper; DEBUGP("destroy_conntrack(%p)\n", ct); IP_NF_ASSERT(atomic_read(&nfct->use) == 0); @@ -321,6 +311,10 @@ destroy_conntrack(struct nf_conntrack *nfct) ip_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); + helper = ct->helper; + if (helper && helper->destroy) + helper->destroy(ct); + /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ @@ -367,16 +361,6 @@ static void death_by_timeout(unsigned long ul_conntrack) ip_conntrack_put(ct); } -static inline int -conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - ASSERT_READ_LOCK(&ip_conntrack_lock); - return tuplehash_to_ctrack(i) != ignored_conntrack - && ip_ct_tuple_equal(tuple, &i->tuple); -} - struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) @@ -384,9 +368,9 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, struct ip_conntrack_tuple_hash *h; unsigned int hash = hash_conntrack(tuple); - ASSERT_READ_LOCK(&ip_conntrack_lock); list_for_each_entry(h, &ip_conntrack_hash[hash], list) { - if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + if (tuplehash_to_ctrack(h) != ignored_conntrack && + ip_ct_tuple_equal(tuple, &h->tuple)) { CONNTRACK_STAT_INC(found); return h; } @@ -417,10 +401,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, unsigned int repl_hash) { ct->id = ++ip_conntrack_next_id; - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY].list); + list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, + &ip_conntrack_hash[hash]); + list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, + &ip_conntrack_hash[repl_hash]); } void ip_conntrack_hash_insert(struct ip_conntrack *ct) @@ -440,6 +424,7 @@ int __ip_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -470,43 +455,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&ip_conntrack_hash[hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&ip_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_for_each_entry(h, &ip_conntrack_hash[hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple)) + goto out; + list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple)) + goto out; - __ip_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - CONNTRACK_STAT_INC(insert); - write_unlock_bh(&ip_conntrack_lock); - if (ct->helper) - ip_conntrack_event_cache(IPCT_HELPER, *pskb); + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + + __ip_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + CONNTRACK_STAT_INC(insert); + write_unlock_bh(&ip_conntrack_lock); + if (ct->helper) + ip_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_IP_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - ip_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - ip_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); + ip_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; - } + return NF_ACCEPT; +out: CONNTRACK_STAT_INC(insert_failed); write_unlock_bh(&ip_conntrack_lock); - return NF_DROP; } @@ -527,23 +512,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct ip_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); -} - static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct = NULL; + struct ip_conntrack *ct = NULL, *tmp; int dropped = 0; read_lock_bh(&ip_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); - if (h) { - ct = tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); + list_for_each_entry_reverse(h, chain, list) { + tmp = tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { + ct = tmp; + atomic_inc(&ct->ct_general.use); + break; + } } read_unlock_bh(&ip_conntrack_lock); @@ -559,18 +542,16 @@ static int early_drop(struct list_head *chain) return dropped; } -static inline int helper_cmp(const struct ip_conntrack_helper *i, - const struct ip_conntrack_tuple *rtuple) -{ - return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} - static struct ip_conntrack_helper * __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) { - return LIST_FIND(&helpers, helper_cmp, - struct ip_conntrack_helper *, - tuple); + struct ip_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + return h; + } + return NULL; } struct ip_conntrack_helper * @@ -640,11 +621,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, ip_conntrack_hash_rnd_initted = 1; } + /* We don't want any race condition at early drop stage */ + atomic_inc(&ip_conntrack_count); + if (ip_conntrack_max - && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + && atomic_read(&ip_conntrack_count) > ip_conntrack_max) { unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&ip_conntrack_hash[hash])) { + atomic_dec(&ip_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING "ip_conntrack: table full, dropping" @@ -656,6 +641,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); + atomic_dec(&ip_conntrack_count); return ERR_PTR(-ENOMEM); } @@ -669,8 +655,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; - atomic_inc(&ip_conntrack_count); - return conntrack; } @@ -1062,7 +1046,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) { BUG_ON(me->timeout == 0); write_lock_bh(&ip_conntrack_lock); - list_prepend(&helpers, me); + list_add(&me->list, &helpers); write_unlock_bh(&ip_conntrack_lock); return 0; @@ -1081,24 +1065,24 @@ __ip_conntrack_helper_find_byname(const char *name) return NULL; } -static inline int unhelp(struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_helper *me) +static inline void unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) { if (tuplehash_to_ctrack(i)->helper == me) { ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; } - return 0; } void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) { unsigned int i; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&ip_conntrack_lock); - LIST_DELETE(&helpers, me); + list_del(&me->list); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { @@ -1108,10 +1092,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) } } /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); - for (i = 0; i < ip_conntrack_htable_size; i++) - LIST_FIND_W(&ip_conntrack_hash[i], unhelp, - struct ip_conntrack_tuple_hash *, me); + list_for_each_entry(h, &unconfirmed, list) + unhelp(h, me); + for (i = 0; i < ip_conntrack_htable_size; i++) { + list_for_each_entry(h, &ip_conntrack_hash[i], list) + unhelp(h, me); + } write_unlock_bh(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1177,9 +1163,9 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, const struct ip_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16), &tuple->src.u.tcp.port); - NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16), &tuple->dst.u.tcp.port); return 0; @@ -1194,9 +1180,9 @@ int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], return -EINVAL; t->src.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); t->dst.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); return 0; } @@ -1237,46 +1223,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) nf_conntrack_get(nskb->nfct); } -static inline int -do_iter(const struct ip_conntrack_tuple_hash *i, - int (*iter)(struct ip_conntrack *i, void *data), - void *data) -{ - return iter(tuplehash_to_ctrack(i), data); -} - /* Bring out ya dead! */ -static struct ip_conntrack_tuple_hash * +static struct ip_conntrack * get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), void *data, unsigned int *bucket) { - struct ip_conntrack_tuple_hash *h = NULL; + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; write_lock_bh(&ip_conntrack_lock); for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - break; + list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } + } + list_for_each_entry(h, &unconfirmed, list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&ip_conntrack_lock); + return NULL; - return h; +found: + atomic_inc(&ct->ct_general.use); + write_unlock_bh(&ip_conntrack_lock); + return ct; } void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) { - struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; unsigned int bucket = 0; - while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { - struct ip_conntrack *ct = tuplehash_to_ctrack(h); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct);