Merge git://git.infradead.org/mtd-2.6
[sfrench/cifs-2.6.git] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22
23 #include <net/netfilter/nf_conntrack.h>
24 #include <net/netfilter/nf_conntrack_core.h>
25 #include <net/netfilter/nf_conntrack_expect.h>
26 #include <net/netfilter/nf_conntrack_helper.h>
27 #include <net/netfilter/nf_conntrack_tuple.h>
28
29 LIST_HEAD(nf_conntrack_expect_list);
30 EXPORT_SYMBOL_GPL(nf_conntrack_expect_list);
31
32 struct kmem_cache *nf_conntrack_expect_cachep __read_mostly;
33 static unsigned int nf_conntrack_expect_next_id;
34
35 /* nf_conntrack_expect helper functions */
36 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
37 {
38         struct nf_conn_help *master_help = nfct_help(exp->master);
39
40         NF_CT_ASSERT(master_help);
41         NF_CT_ASSERT(!timer_pending(&exp->timeout));
42
43         list_del(&exp->list);
44         NF_CT_STAT_INC(expect_delete);
45         master_help->expecting--;
46         nf_conntrack_expect_put(exp);
47 }
48 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
49
50 static void expectation_timed_out(unsigned long ul_expect)
51 {
52         struct nf_conntrack_expect *exp = (void *)ul_expect;
53
54         write_lock_bh(&nf_conntrack_lock);
55         nf_ct_unlink_expect(exp);
56         write_unlock_bh(&nf_conntrack_lock);
57         nf_conntrack_expect_put(exp);
58 }
59
60 struct nf_conntrack_expect *
61 __nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
62 {
63         struct nf_conntrack_expect *i;
64
65         list_for_each_entry(i, &nf_conntrack_expect_list, list) {
66                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
67                         return i;
68         }
69         return NULL;
70 }
71 EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find);
72
73 /* Just find a expectation corresponding to a tuple. */
74 struct nf_conntrack_expect *
75 nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple)
76 {
77         struct nf_conntrack_expect *i;
78
79         read_lock_bh(&nf_conntrack_lock);
80         i = __nf_conntrack_expect_find(tuple);
81         if (i)
82                 atomic_inc(&i->use);
83         read_unlock_bh(&nf_conntrack_lock);
84
85         return i;
86 }
87 EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get);
88
89 /* If an expectation for this connection is found, it gets delete from
90  * global list then returned. */
91 struct nf_conntrack_expect *
92 find_expectation(const struct nf_conntrack_tuple *tuple)
93 {
94         struct nf_conntrack_expect *exp;
95
96         exp = __nf_conntrack_expect_find(tuple);
97         if (!exp)
98                 return NULL;
99
100         /* If master is not in hash table yet (ie. packet hasn't left
101            this machine yet), how can other end know about expected?
102            Hence these are not the droids you are looking for (if
103            master ct never got confirmed, we'd hold a reference to it
104            and weird things would happen to future packets). */
105         if (!nf_ct_is_confirmed(exp->master))
106                 return NULL;
107
108         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
109                 atomic_inc(&exp->use);
110                 return exp;
111         } else if (del_timer(&exp->timeout)) {
112                 nf_ct_unlink_expect(exp);
113                 return exp;
114         }
115
116         return NULL;
117 }
118
119 /* delete all expectations for this conntrack */
120 void nf_ct_remove_expectations(struct nf_conn *ct)
121 {
122         struct nf_conntrack_expect *i, *tmp;
123         struct nf_conn_help *help = nfct_help(ct);
124
125         /* Optimization: most connection never expect any others. */
126         if (!help || help->expecting == 0)
127                 return;
128
129         list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
130                 if (i->master == ct && del_timer(&i->timeout)) {
131                         nf_ct_unlink_expect(i);
132                         nf_conntrack_expect_put(i);
133                 }
134         }
135 }
136 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
137
138 /* Would two expected things clash? */
139 static inline int expect_clash(const struct nf_conntrack_expect *a,
140                                const struct nf_conntrack_expect *b)
141 {
142         /* Part covered by intersection of masks must be unequal,
143            otherwise they clash */
144         struct nf_conntrack_tuple intersect_mask;
145         int count;
146
147         intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
148         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
149         intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
150         intersect_mask.dst.protonum = a->mask.dst.protonum
151                                         & b->mask.dst.protonum;
152
153         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
154                 intersect_mask.src.u3.all[count] =
155                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
156         }
157
158         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
159                 intersect_mask.dst.u3.all[count] =
160                         a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
161         }
162
163         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
164 }
165
166 static inline int expect_matches(const struct nf_conntrack_expect *a,
167                                  const struct nf_conntrack_expect *b)
168 {
169         return a->master == b->master
170                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
171                 && nf_ct_tuple_equal(&a->mask, &b->mask);
172 }
173
174 /* Generally a bad idea to call this: could have matched already. */
175 void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
176 {
177         struct nf_conntrack_expect *i;
178
179         write_lock_bh(&nf_conntrack_lock);
180         /* choose the oldest expectation to evict */
181         list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
182                 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
183                         nf_ct_unlink_expect(i);
184                         write_unlock_bh(&nf_conntrack_lock);
185                         nf_conntrack_expect_put(i);
186                         return;
187                 }
188         }
189         write_unlock_bh(&nf_conntrack_lock);
190 }
191 EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related);
192
193 /* We don't increase the master conntrack refcount for non-fulfilled
194  * conntracks. During the conntrack destruction, the expectations are
195  * always killed before the conntrack itself */
196 struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
197 {
198         struct nf_conntrack_expect *new;
199
200         new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
201         if (!new)
202                 return NULL;
203
204         new->master = me;
205         atomic_set(&new->use, 1);
206         return new;
207 }
208 EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc);
209
210 void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
211                               union nf_conntrack_address *saddr,
212                               union nf_conntrack_address *daddr,
213                               u_int8_t proto, __be16 *src, __be16 *dst)
214 {
215         int len;
216
217         if (family == AF_INET)
218                 len = 4;
219         else
220                 len = 16;
221
222         exp->flags = 0;
223         exp->expectfn = NULL;
224         exp->helper = NULL;
225         exp->tuple.src.l3num = family;
226         exp->tuple.dst.protonum = proto;
227         exp->mask.src.l3num = 0xFFFF;
228         exp->mask.dst.protonum = 0xFF;
229
230         if (saddr) {
231                 memcpy(&exp->tuple.src.u3, saddr, len);
232                 if (sizeof(exp->tuple.src.u3) > len)
233                         /* address needs to be cleared for nf_ct_tuple_equal */
234                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
235                                sizeof(exp->tuple.src.u3) - len);
236                 memset(&exp->mask.src.u3, 0xFF, len);
237                 if (sizeof(exp->mask.src.u3) > len)
238                         memset((void *)&exp->mask.src.u3 + len, 0x00,
239                                sizeof(exp->mask.src.u3) - len);
240         } else {
241                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
242                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
243         }
244
245         if (daddr) {
246                 memcpy(&exp->tuple.dst.u3, daddr, len);
247                 if (sizeof(exp->tuple.dst.u3) > len)
248                         /* address needs to be cleared for nf_ct_tuple_equal */
249                         memset((void *)&exp->tuple.dst.u3 + len, 0x00,
250                                sizeof(exp->tuple.dst.u3) - len);
251                 memset(&exp->mask.dst.u3, 0xFF, len);
252                 if (sizeof(exp->mask.dst.u3) > len)
253                         memset((void *)&exp->mask.dst.u3 + len, 0x00,
254                                sizeof(exp->mask.dst.u3) - len);
255         } else {
256                 memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3));
257                 memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3));
258         }
259
260         if (src) {
261                 exp->tuple.src.u.all = (__force u16)*src;
262                 exp->mask.src.u.all = 0xFFFF;
263         } else {
264                 exp->tuple.src.u.all = 0;
265                 exp->mask.src.u.all = 0;
266         }
267
268         if (dst) {
269                 exp->tuple.dst.u.all = (__force u16)*dst;
270                 exp->mask.dst.u.all = 0xFFFF;
271         } else {
272                 exp->tuple.dst.u.all = 0;
273                 exp->mask.dst.u.all = 0;
274         }
275 }
276 EXPORT_SYMBOL_GPL(nf_conntrack_expect_init);
277
278 void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
279 {
280         if (atomic_dec_and_test(&exp->use))
281                 kmem_cache_free(nf_conntrack_expect_cachep, exp);
282 }
283 EXPORT_SYMBOL_GPL(nf_conntrack_expect_put);
284
285 static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
286 {
287         struct nf_conn_help *master_help = nfct_help(exp->master);
288
289         atomic_inc(&exp->use);
290         master_help->expecting++;
291         list_add(&exp->list, &nf_conntrack_expect_list);
292
293         setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp);
294         exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
295         add_timer(&exp->timeout);
296
297         exp->id = ++nf_conntrack_expect_next_id;
298         atomic_inc(&exp->use);
299         NF_CT_STAT_INC(expect_create);
300 }
301
302 /* Race with expectations being used means we could have none to find; OK. */
303 static void evict_oldest_expect(struct nf_conn *master)
304 {
305         struct nf_conntrack_expect *i;
306
307         list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
308                 if (i->master == master) {
309                         if (del_timer(&i->timeout)) {
310                                 nf_ct_unlink_expect(i);
311                                 nf_conntrack_expect_put(i);
312                         }
313                         break;
314                 }
315         }
316 }
317
318 static inline int refresh_timer(struct nf_conntrack_expect *i)
319 {
320         struct nf_conn_help *master_help = nfct_help(i->master);
321
322         if (!del_timer(&i->timeout))
323                 return 0;
324
325         i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
326         add_timer(&i->timeout);
327         return 1;
328 }
329
330 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
331 {
332         struct nf_conntrack_expect *i;
333         struct nf_conn *master = expect->master;
334         struct nf_conn_help *master_help = nfct_help(master);
335         int ret;
336
337         NF_CT_ASSERT(master_help);
338
339         write_lock_bh(&nf_conntrack_lock);
340         list_for_each_entry(i, &nf_conntrack_expect_list, list) {
341                 if (expect_matches(i, expect)) {
342                         /* Refresh timer: if it's dying, ignore.. */
343                         if (refresh_timer(i)) {
344                                 ret = 0;
345                                 goto out;
346                         }
347                 } else if (expect_clash(i, expect)) {
348                         ret = -EBUSY;
349                         goto out;
350                 }
351         }
352         /* Will be over limit? */
353         if (master_help->helper->max_expected &&
354             master_help->expecting >= master_help->helper->max_expected)
355                 evict_oldest_expect(master);
356
357         nf_conntrack_expect_insert(expect);
358         nf_conntrack_expect_event(IPEXP_NEW, expect);
359         ret = 0;
360 out:
361         write_unlock_bh(&nf_conntrack_lock);
362         return ret;
363 }
364 EXPORT_SYMBOL_GPL(nf_conntrack_expect_related);
365
366 #ifdef CONFIG_PROC_FS
367 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
368 {
369         struct list_head *e = &nf_conntrack_expect_list;
370         loff_t i;
371
372         /* strange seq_file api calls stop even if we fail,
373          * thus we need to grab lock since stop unlocks */
374         read_lock_bh(&nf_conntrack_lock);
375
376         if (list_empty(e))
377                 return NULL;
378
379         for (i = 0; i <= *pos; i++) {
380                 e = e->next;
381                 if (e == &nf_conntrack_expect_list)
382                         return NULL;
383         }
384         return e;
385 }
386
387 static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
388 {
389         struct list_head *e = v;
390
391         ++*pos;
392         e = e->next;
393
394         if (e == &nf_conntrack_expect_list)
395                 return NULL;
396
397         return e;
398 }
399
400 static void exp_seq_stop(struct seq_file *s, void *v)
401 {
402         read_unlock_bh(&nf_conntrack_lock);
403 }
404
405 static int exp_seq_show(struct seq_file *s, void *v)
406 {
407         struct nf_conntrack_expect *expect = v;
408
409         if (expect->timeout.function)
410                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
411                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
412         else
413                 seq_printf(s, "- ");
414         seq_printf(s, "l3proto = %u proto=%u ",
415                    expect->tuple.src.l3num,
416                    expect->tuple.dst.protonum);
417         print_tuple(s, &expect->tuple,
418                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
419                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
420                                        expect->tuple.dst.protonum));
421         return seq_putc(s, '\n');
422 }
423
424 static struct seq_operations exp_seq_ops = {
425         .start = exp_seq_start,
426         .next = exp_seq_next,
427         .stop = exp_seq_stop,
428         .show = exp_seq_show
429 };
430
431 static int exp_open(struct inode *inode, struct file *file)
432 {
433         return seq_open(file, &exp_seq_ops);
434 }
435
436 const struct file_operations exp_file_ops = {
437         .owner   = THIS_MODULE,
438         .open    = exp_open,
439         .read    = seq_read,
440         .llseek  = seq_lseek,
441         .release = seq_release
442 };
443 #endif /* CONFIG_PROC_FS */