Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/cooloney/blackfi...
[sfrench/cifs-2.6.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
24 #include <linux/cache.h>
25
26 #include "xfrm_hash.h"
27
28 struct sock *xfrm_nl;
29 EXPORT_SYMBOL(xfrm_nl);
30
31 u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
32 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
33
34 u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
35 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
36
37 u32 sysctl_xfrm_acq_expires __read_mostly = 30;
38
39 /* Each xfrm_state may be linked to two tables:
40
41    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
42    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
43       destination/tunnel endpoint. (output)
44  */
45
46 static DEFINE_SPINLOCK(xfrm_state_lock);
47
48 /* Hash table to find appropriate SA towards given target (endpoint
49  * of tunnel or destination of transport mode) allowed by selector.
50  *
51  * Main use is finding SA after policy selected tunnel or transport mode.
52  * Also, it can be used by ah/esp icmp error handler to find offending SA.
53  */
54 static struct hlist_head *xfrm_state_bydst __read_mostly;
55 static struct hlist_head *xfrm_state_bysrc __read_mostly;
56 static struct hlist_head *xfrm_state_byspi __read_mostly;
57 static unsigned int xfrm_state_hmask __read_mostly;
58 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
59 static unsigned int xfrm_state_num;
60 static unsigned int xfrm_state_genid;
61
62 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
63                                          xfrm_address_t *saddr,
64                                          u32 reqid,
65                                          unsigned short family)
66 {
67         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
68 }
69
70 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
71                                          xfrm_address_t *saddr,
72                                          unsigned short family)
73 {
74         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
75 }
76
77 static inline unsigned int
78 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
79 {
80         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
81 }
82
83 static void xfrm_hash_transfer(struct hlist_head *list,
84                                struct hlist_head *ndsttable,
85                                struct hlist_head *nsrctable,
86                                struct hlist_head *nspitable,
87                                unsigned int nhashmask)
88 {
89         struct hlist_node *entry, *tmp;
90         struct xfrm_state *x;
91
92         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
93                 unsigned int h;
94
95                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
96                                     x->props.reqid, x->props.family,
97                                     nhashmask);
98                 hlist_add_head(&x->bydst, ndsttable+h);
99
100                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
101                                     x->props.family,
102                                     nhashmask);
103                 hlist_add_head(&x->bysrc, nsrctable+h);
104
105                 if (x->id.spi) {
106                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
107                                             x->id.proto, x->props.family,
108                                             nhashmask);
109                         hlist_add_head(&x->byspi, nspitable+h);
110                 }
111         }
112 }
113
114 static unsigned long xfrm_hash_new_size(void)
115 {
116         return ((xfrm_state_hmask + 1) << 1) *
117                 sizeof(struct hlist_head);
118 }
119
120 static DEFINE_MUTEX(hash_resize_mutex);
121
122 static void xfrm_hash_resize(struct work_struct *__unused)
123 {
124         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
125         unsigned long nsize, osize;
126         unsigned int nhashmask, ohashmask;
127         int i;
128
129         mutex_lock(&hash_resize_mutex);
130
131         nsize = xfrm_hash_new_size();
132         ndst = xfrm_hash_alloc(nsize);
133         if (!ndst)
134                 goto out_unlock;
135         nsrc = xfrm_hash_alloc(nsize);
136         if (!nsrc) {
137                 xfrm_hash_free(ndst, nsize);
138                 goto out_unlock;
139         }
140         nspi = xfrm_hash_alloc(nsize);
141         if (!nspi) {
142                 xfrm_hash_free(ndst, nsize);
143                 xfrm_hash_free(nsrc, nsize);
144                 goto out_unlock;
145         }
146
147         spin_lock_bh(&xfrm_state_lock);
148
149         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
150         for (i = xfrm_state_hmask; i >= 0; i--)
151                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
152                                    nhashmask);
153
154         odst = xfrm_state_bydst;
155         osrc = xfrm_state_bysrc;
156         ospi = xfrm_state_byspi;
157         ohashmask = xfrm_state_hmask;
158
159         xfrm_state_bydst = ndst;
160         xfrm_state_bysrc = nsrc;
161         xfrm_state_byspi = nspi;
162         xfrm_state_hmask = nhashmask;
163
164         spin_unlock_bh(&xfrm_state_lock);
165
166         osize = (ohashmask + 1) * sizeof(struct hlist_head);
167         xfrm_hash_free(odst, osize);
168         xfrm_hash_free(osrc, osize);
169         xfrm_hash_free(ospi, osize);
170
171 out_unlock:
172         mutex_unlock(&hash_resize_mutex);
173 }
174
175 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
176
177 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
178 EXPORT_SYMBOL(km_waitq);
179
180 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
181 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
182
183 static struct work_struct xfrm_state_gc_work;
184 static HLIST_HEAD(xfrm_state_gc_list);
185 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
186
187 int __xfrm_state_delete(struct xfrm_state *x);
188
189 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
190 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
191
192 static void xfrm_state_gc_destroy(struct xfrm_state *x)
193 {
194         del_timer_sync(&x->timer);
195         del_timer_sync(&x->rtimer);
196         kfree(x->aalg);
197         kfree(x->ealg);
198         kfree(x->calg);
199         kfree(x->encap);
200         kfree(x->coaddr);
201         if (x->mode)
202                 xfrm_put_mode(x->mode);
203         if (x->type) {
204                 x->type->destructor(x);
205                 xfrm_put_type(x->type);
206         }
207         security_xfrm_state_free(x);
208         kfree(x);
209 }
210
211 static void xfrm_state_gc_task(struct work_struct *data)
212 {
213         struct xfrm_state *x;
214         struct hlist_node *entry, *tmp;
215         struct hlist_head gc_list;
216
217         spin_lock_bh(&xfrm_state_gc_lock);
218         gc_list.first = xfrm_state_gc_list.first;
219         INIT_HLIST_HEAD(&xfrm_state_gc_list);
220         spin_unlock_bh(&xfrm_state_gc_lock);
221
222         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
223                 xfrm_state_gc_destroy(x);
224
225         wake_up(&km_waitq);
226 }
227
228 static inline unsigned long make_jiffies(long secs)
229 {
230         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
231                 return MAX_SCHEDULE_TIMEOUT-1;
232         else
233                 return secs*HZ;
234 }
235
236 static void xfrm_timer_handler(unsigned long data)
237 {
238         struct xfrm_state *x = (struct xfrm_state*)data;
239         unsigned long now = get_seconds();
240         long next = LONG_MAX;
241         int warn = 0;
242         int err = 0;
243
244         spin_lock(&x->lock);
245         if (x->km.state == XFRM_STATE_DEAD)
246                 goto out;
247         if (x->km.state == XFRM_STATE_EXPIRED)
248                 goto expired;
249         if (x->lft.hard_add_expires_seconds) {
250                 long tmo = x->lft.hard_add_expires_seconds +
251                         x->curlft.add_time - now;
252                 if (tmo <= 0)
253                         goto expired;
254                 if (tmo < next)
255                         next = tmo;
256         }
257         if (x->lft.hard_use_expires_seconds) {
258                 long tmo = x->lft.hard_use_expires_seconds +
259                         (x->curlft.use_time ? : now) - now;
260                 if (tmo <= 0)
261                         goto expired;
262                 if (tmo < next)
263                         next = tmo;
264         }
265         if (x->km.dying)
266                 goto resched;
267         if (x->lft.soft_add_expires_seconds) {
268                 long tmo = x->lft.soft_add_expires_seconds +
269                         x->curlft.add_time - now;
270                 if (tmo <= 0)
271                         warn = 1;
272                 else if (tmo < next)
273                         next = tmo;
274         }
275         if (x->lft.soft_use_expires_seconds) {
276                 long tmo = x->lft.soft_use_expires_seconds +
277                         (x->curlft.use_time ? : now) - now;
278                 if (tmo <= 0)
279                         warn = 1;
280                 else if (tmo < next)
281                         next = tmo;
282         }
283
284         x->km.dying = warn;
285         if (warn)
286                 km_state_expired(x, 0, 0);
287 resched:
288         if (next != LONG_MAX)
289                 mod_timer(&x->timer, jiffies + make_jiffies(next));
290
291         goto out;
292
293 expired:
294         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
295                 x->km.state = XFRM_STATE_EXPIRED;
296                 wake_up(&km_waitq);
297                 next = 2;
298                 goto resched;
299         }
300
301         err = __xfrm_state_delete(x);
302         if (!err && x->id.spi)
303                 km_state_expired(x, 1, 0);
304
305         xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
306                        AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
307
308 out:
309         spin_unlock(&x->lock);
310 }
311
312 static void xfrm_replay_timer_handler(unsigned long data);
313
314 struct xfrm_state *xfrm_state_alloc(void)
315 {
316         struct xfrm_state *x;
317
318         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
319
320         if (x) {
321                 atomic_set(&x->refcnt, 1);
322                 atomic_set(&x->tunnel_users, 0);
323                 INIT_HLIST_NODE(&x->bydst);
324                 INIT_HLIST_NODE(&x->bysrc);
325                 INIT_HLIST_NODE(&x->byspi);
326                 init_timer(&x->timer);
327                 x->timer.function = xfrm_timer_handler;
328                 x->timer.data     = (unsigned long)x;
329                 init_timer(&x->rtimer);
330                 x->rtimer.function = xfrm_replay_timer_handler;
331                 x->rtimer.data     = (unsigned long)x;
332                 x->curlft.add_time = get_seconds();
333                 x->lft.soft_byte_limit = XFRM_INF;
334                 x->lft.soft_packet_limit = XFRM_INF;
335                 x->lft.hard_byte_limit = XFRM_INF;
336                 x->lft.hard_packet_limit = XFRM_INF;
337                 x->replay_maxage = 0;
338                 x->replay_maxdiff = 0;
339                 spin_lock_init(&x->lock);
340         }
341         return x;
342 }
343 EXPORT_SYMBOL(xfrm_state_alloc);
344
345 void __xfrm_state_destroy(struct xfrm_state *x)
346 {
347         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
348
349         spin_lock_bh(&xfrm_state_gc_lock);
350         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
351         spin_unlock_bh(&xfrm_state_gc_lock);
352         schedule_work(&xfrm_state_gc_work);
353 }
354 EXPORT_SYMBOL(__xfrm_state_destroy);
355
356 int __xfrm_state_delete(struct xfrm_state *x)
357 {
358         int err = -ESRCH;
359
360         if (x->km.state != XFRM_STATE_DEAD) {
361                 x->km.state = XFRM_STATE_DEAD;
362                 spin_lock(&xfrm_state_lock);
363                 hlist_del(&x->bydst);
364                 hlist_del(&x->bysrc);
365                 if (x->id.spi)
366                         hlist_del(&x->byspi);
367                 xfrm_state_num--;
368                 spin_unlock(&xfrm_state_lock);
369
370                 /* All xfrm_state objects are created by xfrm_state_alloc.
371                  * The xfrm_state_alloc call gives a reference, and that
372                  * is what we are dropping here.
373                  */
374                 __xfrm_state_put(x);
375                 err = 0;
376         }
377
378         return err;
379 }
380 EXPORT_SYMBOL(__xfrm_state_delete);
381
382 int xfrm_state_delete(struct xfrm_state *x)
383 {
384         int err;
385
386         spin_lock_bh(&x->lock);
387         err = __xfrm_state_delete(x);
388         spin_unlock_bh(&x->lock);
389
390         return err;
391 }
392 EXPORT_SYMBOL(xfrm_state_delete);
393
394 #ifdef CONFIG_SECURITY_NETWORK_XFRM
395 static inline int
396 xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
397 {
398         int i, err = 0;
399
400         for (i = 0; i <= xfrm_state_hmask; i++) {
401                 struct hlist_node *entry;
402                 struct xfrm_state *x;
403
404                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
405                         if (xfrm_id_proto_match(x->id.proto, proto) &&
406                            (err = security_xfrm_state_delete(x)) != 0) {
407                                 xfrm_audit_log(audit_info->loginuid,
408                                                audit_info->secid,
409                                                AUDIT_MAC_IPSEC_DELSA,
410                                                0, NULL, x);
411
412                                 return err;
413                         }
414                 }
415         }
416
417         return err;
418 }
419 #else
420 static inline int
421 xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
422 {
423         return 0;
424 }
425 #endif
426
427 int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
428 {
429         int i, err = 0;
430
431         spin_lock_bh(&xfrm_state_lock);
432         err = xfrm_state_flush_secctx_check(proto, audit_info);
433         if (err)
434                 goto out;
435
436         for (i = 0; i <= xfrm_state_hmask; i++) {
437                 struct hlist_node *entry;
438                 struct xfrm_state *x;
439 restart:
440                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
441                         if (!xfrm_state_kern(x) &&
442                             xfrm_id_proto_match(x->id.proto, proto)) {
443                                 xfrm_state_hold(x);
444                                 spin_unlock_bh(&xfrm_state_lock);
445
446                                 err = xfrm_state_delete(x);
447                                 xfrm_audit_log(audit_info->loginuid,
448                                                audit_info->secid,
449                                                AUDIT_MAC_IPSEC_DELSA,
450                                                err ? 0 : 1, NULL, x);
451                                 xfrm_state_put(x);
452
453                                 spin_lock_bh(&xfrm_state_lock);
454                                 goto restart;
455                         }
456                 }
457         }
458         err = 0;
459
460 out:
461         spin_unlock_bh(&xfrm_state_lock);
462         wake_up(&km_waitq);
463         return err;
464 }
465 EXPORT_SYMBOL(xfrm_state_flush);
466
467 void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
468 {
469         spin_lock_bh(&xfrm_state_lock);
470         si->sadcnt = xfrm_state_num;
471         si->sadhcnt = xfrm_state_hmask;
472         si->sadhmcnt = xfrm_state_hashmax;
473         spin_unlock_bh(&xfrm_state_lock);
474 }
475 EXPORT_SYMBOL(xfrm_sad_getinfo);
476
477 static int
478 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
479                   struct xfrm_tmpl *tmpl,
480                   xfrm_address_t *daddr, xfrm_address_t *saddr,
481                   unsigned short family)
482 {
483         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
484         if (!afinfo)
485                 return -1;
486         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
487         xfrm_state_put_afinfo(afinfo);
488         return 0;
489 }
490
491 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
492 {
493         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
494         struct xfrm_state *x;
495         struct hlist_node *entry;
496
497         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
498                 if (x->props.family != family ||
499                     x->id.spi       != spi ||
500                     x->id.proto     != proto)
501                         continue;
502
503                 switch (family) {
504                 case AF_INET:
505                         if (x->id.daddr.a4 != daddr->a4)
506                                 continue;
507                         break;
508                 case AF_INET6:
509                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
510                                              (struct in6_addr *)
511                                              x->id.daddr.a6))
512                                 continue;
513                         break;
514                 }
515
516                 xfrm_state_hold(x);
517                 return x;
518         }
519
520         return NULL;
521 }
522
523 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
524 {
525         unsigned int h = xfrm_src_hash(daddr, saddr, family);
526         struct xfrm_state *x;
527         struct hlist_node *entry;
528
529         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
530                 if (x->props.family != family ||
531                     x->id.proto     != proto)
532                         continue;
533
534                 switch (family) {
535                 case AF_INET:
536                         if (x->id.daddr.a4 != daddr->a4 ||
537                             x->props.saddr.a4 != saddr->a4)
538                                 continue;
539                         break;
540                 case AF_INET6:
541                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
542                                              (struct in6_addr *)
543                                              x->id.daddr.a6) ||
544                             !ipv6_addr_equal((struct in6_addr *)saddr,
545                                              (struct in6_addr *)
546                                              x->props.saddr.a6))
547                                 continue;
548                         break;
549                 }
550
551                 xfrm_state_hold(x);
552                 return x;
553         }
554
555         return NULL;
556 }
557
558 static inline struct xfrm_state *
559 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
560 {
561         if (use_spi)
562                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
563                                            x->id.proto, family);
564         else
565                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
566                                                   &x->props.saddr,
567                                                   x->id.proto, family);
568 }
569
570 static void xfrm_hash_grow_check(int have_hash_collision)
571 {
572         if (have_hash_collision &&
573             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
574             xfrm_state_num > xfrm_state_hmask)
575                 schedule_work(&xfrm_hash_work);
576 }
577
578 struct xfrm_state *
579 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
580                 struct flowi *fl, struct xfrm_tmpl *tmpl,
581                 struct xfrm_policy *pol, int *err,
582                 unsigned short family)
583 {
584         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
585         struct hlist_node *entry;
586         struct xfrm_state *x, *x0;
587         int acquire_in_progress = 0;
588         int error = 0;
589         struct xfrm_state *best = NULL;
590
591         spin_lock_bh(&xfrm_state_lock);
592         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
593                 if (x->props.family == family &&
594                     x->props.reqid == tmpl->reqid &&
595                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
596                     xfrm_state_addr_check(x, daddr, saddr, family) &&
597                     tmpl->mode == x->props.mode &&
598                     tmpl->id.proto == x->id.proto &&
599                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
600                         /* Resolution logic:
601                            1. There is a valid state with matching selector.
602                               Done.
603                            2. Valid state with inappropriate selector. Skip.
604
605                            Entering area of "sysdeps".
606
607                            3. If state is not valid, selector is temporary,
608                               it selects only session which triggered
609                               previous resolution. Key manager will do
610                               something to install a state with proper
611                               selector.
612                          */
613                         if (x->km.state == XFRM_STATE_VALID) {
614                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
615                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
616                                         continue;
617                                 if (!best ||
618                                     best->km.dying > x->km.dying ||
619                                     (best->km.dying == x->km.dying &&
620                                      best->curlft.add_time < x->curlft.add_time))
621                                         best = x;
622                         } else if (x->km.state == XFRM_STATE_ACQ) {
623                                 acquire_in_progress = 1;
624                         } else if (x->km.state == XFRM_STATE_ERROR ||
625                                    x->km.state == XFRM_STATE_EXPIRED) {
626                                 if (xfrm_selector_match(&x->sel, fl, family) &&
627                                     security_xfrm_state_pol_flow_match(x, pol, fl))
628                                         error = -ESRCH;
629                         }
630                 }
631         }
632
633         x = best;
634         if (!x && !error && !acquire_in_progress) {
635                 if (tmpl->id.spi &&
636                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
637                                               tmpl->id.proto, family)) != NULL) {
638                         xfrm_state_put(x0);
639                         error = -EEXIST;
640                         goto out;
641                 }
642                 x = xfrm_state_alloc();
643                 if (x == NULL) {
644                         error = -ENOMEM;
645                         goto out;
646                 }
647                 /* Initialize temporary selector matching only
648                  * to current session. */
649                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
650
651                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
652                 if (error) {
653                         x->km.state = XFRM_STATE_DEAD;
654                         xfrm_state_put(x);
655                         x = NULL;
656                         goto out;
657                 }
658
659                 if (km_query(x, tmpl, pol) == 0) {
660                         x->km.state = XFRM_STATE_ACQ;
661                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
662                         h = xfrm_src_hash(daddr, saddr, family);
663                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
664                         if (x->id.spi) {
665                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
666                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
667                         }
668                         x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
669                         x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
670                         add_timer(&x->timer);
671                         xfrm_state_num++;
672                         xfrm_hash_grow_check(x->bydst.next != NULL);
673                 } else {
674                         x->km.state = XFRM_STATE_DEAD;
675                         xfrm_state_put(x);
676                         x = NULL;
677                         error = -ESRCH;
678                 }
679         }
680 out:
681         if (x)
682                 xfrm_state_hold(x);
683         else
684                 *err = acquire_in_progress ? -EAGAIN : error;
685         spin_unlock_bh(&xfrm_state_lock);
686         return x;
687 }
688
689 static void __xfrm_state_insert(struct xfrm_state *x)
690 {
691         unsigned int h;
692
693         x->genid = ++xfrm_state_genid;
694
695         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
696                           x->props.reqid, x->props.family);
697         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
698
699         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
700         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
701
702         if (x->id.spi) {
703                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
704                                   x->props.family);
705
706                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
707         }
708
709         mod_timer(&x->timer, jiffies + HZ);
710         if (x->replay_maxage)
711                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
712
713         wake_up(&km_waitq);
714
715         xfrm_state_num++;
716
717         xfrm_hash_grow_check(x->bydst.next != NULL);
718 }
719
720 /* xfrm_state_lock is held */
721 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
722 {
723         unsigned short family = xnew->props.family;
724         u32 reqid = xnew->props.reqid;
725         struct xfrm_state *x;
726         struct hlist_node *entry;
727         unsigned int h;
728
729         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
730         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
731                 if (x->props.family     == family &&
732                     x->props.reqid      == reqid &&
733                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
734                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
735                         x->genid = xfrm_state_genid;
736         }
737 }
738
739 void xfrm_state_insert(struct xfrm_state *x)
740 {
741         spin_lock_bh(&xfrm_state_lock);
742         __xfrm_state_bump_genids(x);
743         __xfrm_state_insert(x);
744         spin_unlock_bh(&xfrm_state_lock);
745 }
746 EXPORT_SYMBOL(xfrm_state_insert);
747
748 /* xfrm_state_lock is held */
749 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
750 {
751         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
752         struct hlist_node *entry;
753         struct xfrm_state *x;
754
755         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
756                 if (x->props.reqid  != reqid ||
757                     x->props.mode   != mode ||
758                     x->props.family != family ||
759                     x->km.state     != XFRM_STATE_ACQ ||
760                     x->id.spi       != 0 ||
761                     x->id.proto     != proto)
762                         continue;
763
764                 switch (family) {
765                 case AF_INET:
766                         if (x->id.daddr.a4    != daddr->a4 ||
767                             x->props.saddr.a4 != saddr->a4)
768                                 continue;
769                         break;
770                 case AF_INET6:
771                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
772                                              (struct in6_addr *)daddr) ||
773                             !ipv6_addr_equal((struct in6_addr *)
774                                              x->props.saddr.a6,
775                                              (struct in6_addr *)saddr))
776                                 continue;
777                         break;
778                 }
779
780                 xfrm_state_hold(x);
781                 return x;
782         }
783
784         if (!create)
785                 return NULL;
786
787         x = xfrm_state_alloc();
788         if (likely(x)) {
789                 switch (family) {
790                 case AF_INET:
791                         x->sel.daddr.a4 = daddr->a4;
792                         x->sel.saddr.a4 = saddr->a4;
793                         x->sel.prefixlen_d = 32;
794                         x->sel.prefixlen_s = 32;
795                         x->props.saddr.a4 = saddr->a4;
796                         x->id.daddr.a4 = daddr->a4;
797                         break;
798
799                 case AF_INET6:
800                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
801                                        (struct in6_addr *)daddr);
802                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
803                                        (struct in6_addr *)saddr);
804                         x->sel.prefixlen_d = 128;
805                         x->sel.prefixlen_s = 128;
806                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
807                                        (struct in6_addr *)saddr);
808                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
809                                        (struct in6_addr *)daddr);
810                         break;
811                 }
812
813                 x->km.state = XFRM_STATE_ACQ;
814                 x->id.proto = proto;
815                 x->props.family = family;
816                 x->props.mode = mode;
817                 x->props.reqid = reqid;
818                 x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
819                 xfrm_state_hold(x);
820                 x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
821                 add_timer(&x->timer);
822                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
823                 h = xfrm_src_hash(daddr, saddr, family);
824                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
825                 wake_up(&km_waitq);
826
827                 xfrm_state_num++;
828
829                 xfrm_hash_grow_check(x->bydst.next != NULL);
830         }
831
832         return x;
833 }
834
835 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
836
837 int xfrm_state_add(struct xfrm_state *x)
838 {
839         struct xfrm_state *x1;
840         int family;
841         int err;
842         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
843
844         family = x->props.family;
845
846         spin_lock_bh(&xfrm_state_lock);
847
848         x1 = __xfrm_state_locate(x, use_spi, family);
849         if (x1) {
850                 xfrm_state_put(x1);
851                 x1 = NULL;
852                 err = -EEXIST;
853                 goto out;
854         }
855
856         if (use_spi && x->km.seq) {
857                 x1 = __xfrm_find_acq_byseq(x->km.seq);
858                 if (x1 && ((x1->id.proto != x->id.proto) ||
859                     xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
860                         xfrm_state_put(x1);
861                         x1 = NULL;
862                 }
863         }
864
865         if (use_spi && !x1)
866                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
867                                      x->id.proto,
868                                      &x->id.daddr, &x->props.saddr, 0);
869
870         __xfrm_state_bump_genids(x);
871         __xfrm_state_insert(x);
872         err = 0;
873
874 out:
875         spin_unlock_bh(&xfrm_state_lock);
876
877         if (x1) {
878                 xfrm_state_delete(x1);
879                 xfrm_state_put(x1);
880         }
881
882         return err;
883 }
884 EXPORT_SYMBOL(xfrm_state_add);
885
886 #ifdef CONFIG_XFRM_MIGRATE
887 struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
888 {
889         int err = -ENOMEM;
890         struct xfrm_state *x = xfrm_state_alloc();
891         if (!x)
892                 goto error;
893
894         memcpy(&x->id, &orig->id, sizeof(x->id));
895         memcpy(&x->sel, &orig->sel, sizeof(x->sel));
896         memcpy(&x->lft, &orig->lft, sizeof(x->lft));
897         x->props.mode = orig->props.mode;
898         x->props.replay_window = orig->props.replay_window;
899         x->props.reqid = orig->props.reqid;
900         x->props.family = orig->props.family;
901         x->props.saddr = orig->props.saddr;
902
903         if (orig->aalg) {
904                 x->aalg = xfrm_algo_clone(orig->aalg);
905                 if (!x->aalg)
906                         goto error;
907         }
908         x->props.aalgo = orig->props.aalgo;
909
910         if (orig->ealg) {
911                 x->ealg = xfrm_algo_clone(orig->ealg);
912                 if (!x->ealg)
913                         goto error;
914         }
915         x->props.ealgo = orig->props.ealgo;
916
917         if (orig->calg) {
918                 x->calg = xfrm_algo_clone(orig->calg);
919                 if (!x->calg)
920                         goto error;
921         }
922         x->props.calgo = orig->props.calgo;
923
924         if (orig->encap) {
925                 x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
926                 if (!x->encap)
927                         goto error;
928         }
929
930         if (orig->coaddr) {
931                 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
932                                     GFP_KERNEL);
933                 if (!x->coaddr)
934                         goto error;
935         }
936
937         err = xfrm_init_state(x);
938         if (err)
939                 goto error;
940
941         x->props.flags = orig->props.flags;
942
943         x->curlft.add_time = orig->curlft.add_time;
944         x->km.state = orig->km.state;
945         x->km.seq = orig->km.seq;
946
947         return x;
948
949  error:
950         if (errp)
951                 *errp = err;
952         if (x) {
953                 kfree(x->aalg);
954                 kfree(x->ealg);
955                 kfree(x->calg);
956                 kfree(x->encap);
957                 kfree(x->coaddr);
958         }
959         kfree(x);
960         return NULL;
961 }
962 EXPORT_SYMBOL(xfrm_state_clone);
963
964 /* xfrm_state_lock is held */
965 struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
966 {
967         unsigned int h;
968         struct xfrm_state *x;
969         struct hlist_node *entry;
970
971         if (m->reqid) {
972                 h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
973                                   m->reqid, m->old_family);
974                 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
975                         if (x->props.mode != m->mode ||
976                             x->id.proto != m->proto)
977                                 continue;
978                         if (m->reqid && x->props.reqid != m->reqid)
979                                 continue;
980                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
981                                           m->old_family) ||
982                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
983                                           m->old_family))
984                                 continue;
985                         xfrm_state_hold(x);
986                         return x;
987                 }
988         } else {
989                 h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
990                                   m->old_family);
991                 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
992                         if (x->props.mode != m->mode ||
993                             x->id.proto != m->proto)
994                                 continue;
995                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
996                                           m->old_family) ||
997                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
998                                           m->old_family))
999                                 continue;
1000                         xfrm_state_hold(x);
1001                         return x;
1002                 }
1003         }
1004
1005         return NULL;
1006 }
1007 EXPORT_SYMBOL(xfrm_migrate_state_find);
1008
1009 struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
1010                                        struct xfrm_migrate *m)
1011 {
1012         struct xfrm_state *xc;
1013         int err;
1014
1015         xc = xfrm_state_clone(x, &err);
1016         if (!xc)
1017                 return NULL;
1018
1019         memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1020         memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1021
1022         /* add state */
1023         if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
1024                 /* a care is needed when the destination address of the
1025                    state is to be updated as it is a part of triplet */
1026                 xfrm_state_insert(xc);
1027         } else {
1028                 if ((err = xfrm_state_add(xc)) < 0)
1029                         goto error;
1030         }
1031
1032         return xc;
1033 error:
1034         kfree(xc);
1035         return NULL;
1036 }
1037 EXPORT_SYMBOL(xfrm_state_migrate);
1038 #endif
1039
1040 int xfrm_state_update(struct xfrm_state *x)
1041 {
1042         struct xfrm_state *x1;
1043         int err;
1044         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1045
1046         spin_lock_bh(&xfrm_state_lock);
1047         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1048
1049         err = -ESRCH;
1050         if (!x1)
1051                 goto out;
1052
1053         if (xfrm_state_kern(x1)) {
1054                 xfrm_state_put(x1);
1055                 err = -EEXIST;
1056                 goto out;
1057         }
1058
1059         if (x1->km.state == XFRM_STATE_ACQ) {
1060                 __xfrm_state_insert(x);
1061                 x = NULL;
1062         }
1063         err = 0;
1064
1065 out:
1066         spin_unlock_bh(&xfrm_state_lock);
1067
1068         if (err)
1069                 return err;
1070
1071         if (!x) {
1072                 xfrm_state_delete(x1);
1073                 xfrm_state_put(x1);
1074                 return 0;
1075         }
1076
1077         err = -EINVAL;
1078         spin_lock_bh(&x1->lock);
1079         if (likely(x1->km.state == XFRM_STATE_VALID)) {
1080                 if (x->encap && x1->encap)
1081                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1082                 if (x->coaddr && x1->coaddr) {
1083                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1084                 }
1085                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1086                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1087                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1088                 x1->km.dying = 0;
1089
1090                 mod_timer(&x1->timer, jiffies + HZ);
1091                 if (x1->curlft.use_time)
1092                         xfrm_state_check_expire(x1);
1093
1094                 err = 0;
1095         }
1096         spin_unlock_bh(&x1->lock);
1097
1098         xfrm_state_put(x1);
1099
1100         return err;
1101 }
1102 EXPORT_SYMBOL(xfrm_state_update);
1103
1104 int xfrm_state_check_expire(struct xfrm_state *x)
1105 {
1106         if (!x->curlft.use_time)
1107                 x->curlft.use_time = get_seconds();
1108
1109         if (x->km.state != XFRM_STATE_VALID)
1110                 return -EINVAL;
1111
1112         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1113             x->curlft.packets >= x->lft.hard_packet_limit) {
1114                 x->km.state = XFRM_STATE_EXPIRED;
1115                 mod_timer(&x->timer, jiffies);
1116                 return -EINVAL;
1117         }
1118
1119         if (!x->km.dying &&
1120             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1121              x->curlft.packets >= x->lft.soft_packet_limit)) {
1122                 x->km.dying = 1;
1123                 km_state_expired(x, 0, 0);
1124         }
1125         return 0;
1126 }
1127 EXPORT_SYMBOL(xfrm_state_check_expire);
1128
1129 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1130 {
1131         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1132                 - skb_headroom(skb);
1133
1134         if (nhead > 0)
1135                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1136
1137         /* Check tail too... */
1138         return 0;
1139 }
1140
1141 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1142 {
1143         int err = xfrm_state_check_expire(x);
1144         if (err < 0)
1145                 goto err;
1146         err = xfrm_state_check_space(x, skb);
1147 err:
1148         return err;
1149 }
1150 EXPORT_SYMBOL(xfrm_state_check);
1151
1152 struct xfrm_state *
1153 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
1154                   unsigned short family)
1155 {
1156         struct xfrm_state *x;
1157
1158         spin_lock_bh(&xfrm_state_lock);
1159         x = __xfrm_state_lookup(daddr, spi, proto, family);
1160         spin_unlock_bh(&xfrm_state_lock);
1161         return x;
1162 }
1163 EXPORT_SYMBOL(xfrm_state_lookup);
1164
1165 struct xfrm_state *
1166 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1167                          u8 proto, unsigned short family)
1168 {
1169         struct xfrm_state *x;
1170
1171         spin_lock_bh(&xfrm_state_lock);
1172         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1173         spin_unlock_bh(&xfrm_state_lock);
1174         return x;
1175 }
1176 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1177
1178 struct xfrm_state *
1179 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1180               xfrm_address_t *daddr, xfrm_address_t *saddr,
1181               int create, unsigned short family)
1182 {
1183         struct xfrm_state *x;
1184
1185         spin_lock_bh(&xfrm_state_lock);
1186         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1187         spin_unlock_bh(&xfrm_state_lock);
1188
1189         return x;
1190 }
1191 EXPORT_SYMBOL(xfrm_find_acq);
1192
1193 #ifdef CONFIG_XFRM_SUB_POLICY
1194 int
1195 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1196                unsigned short family)
1197 {
1198         int err = 0;
1199         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1200         if (!afinfo)
1201                 return -EAFNOSUPPORT;
1202
1203         spin_lock_bh(&xfrm_state_lock);
1204         if (afinfo->tmpl_sort)
1205                 err = afinfo->tmpl_sort(dst, src, n);
1206         spin_unlock_bh(&xfrm_state_lock);
1207         xfrm_state_put_afinfo(afinfo);
1208         return err;
1209 }
1210 EXPORT_SYMBOL(xfrm_tmpl_sort);
1211
1212 int
1213 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1214                 unsigned short family)
1215 {
1216         int err = 0;
1217         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1218         if (!afinfo)
1219                 return -EAFNOSUPPORT;
1220
1221         spin_lock_bh(&xfrm_state_lock);
1222         if (afinfo->state_sort)
1223                 err = afinfo->state_sort(dst, src, n);
1224         spin_unlock_bh(&xfrm_state_lock);
1225         xfrm_state_put_afinfo(afinfo);
1226         return err;
1227 }
1228 EXPORT_SYMBOL(xfrm_state_sort);
1229 #endif
1230
1231 /* Silly enough, but I'm lazy to build resolution list */
1232
1233 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1234 {
1235         int i;
1236
1237         for (i = 0; i <= xfrm_state_hmask; i++) {
1238                 struct hlist_node *entry;
1239                 struct xfrm_state *x;
1240
1241                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1242                         if (x->km.seq == seq &&
1243                             x->km.state == XFRM_STATE_ACQ) {
1244                                 xfrm_state_hold(x);
1245                                 return x;
1246                         }
1247                 }
1248         }
1249         return NULL;
1250 }
1251
1252 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1253 {
1254         struct xfrm_state *x;
1255
1256         spin_lock_bh(&xfrm_state_lock);
1257         x = __xfrm_find_acq_byseq(seq);
1258         spin_unlock_bh(&xfrm_state_lock);
1259         return x;
1260 }
1261 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1262
1263 u32 xfrm_get_acqseq(void)
1264 {
1265         u32 res;
1266         static u32 acqseq;
1267         static DEFINE_SPINLOCK(acqseq_lock);
1268
1269         spin_lock_bh(&acqseq_lock);
1270         res = (++acqseq ? : ++acqseq);
1271         spin_unlock_bh(&acqseq_lock);
1272         return res;
1273 }
1274 EXPORT_SYMBOL(xfrm_get_acqseq);
1275
1276 void
1277 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1278 {
1279         unsigned int h;
1280         struct xfrm_state *x0;
1281
1282         if (x->id.spi)
1283                 return;
1284
1285         if (minspi == maxspi) {
1286                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1287                 if (x0) {
1288                         xfrm_state_put(x0);
1289                         return;
1290                 }
1291                 x->id.spi = minspi;
1292         } else {
1293                 u32 spi = 0;
1294                 u32 low = ntohl(minspi);
1295                 u32 high = ntohl(maxspi);
1296                 for (h=0; h<high-low+1; h++) {
1297                         spi = low + net_random()%(high-low+1);
1298                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1299                         if (x0 == NULL) {
1300                                 x->id.spi = htonl(spi);
1301                                 break;
1302                         }
1303                         xfrm_state_put(x0);
1304                 }
1305         }
1306         if (x->id.spi) {
1307                 spin_lock_bh(&xfrm_state_lock);
1308                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1309                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1310                 spin_unlock_bh(&xfrm_state_lock);
1311                 wake_up(&km_waitq);
1312         }
1313 }
1314 EXPORT_SYMBOL(xfrm_alloc_spi);
1315
1316 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1317                     void *data)
1318 {
1319         int i;
1320         struct xfrm_state *x, *last = NULL;
1321         struct hlist_node *entry;
1322         int count = 0;
1323         int err = 0;
1324
1325         spin_lock_bh(&xfrm_state_lock);
1326         for (i = 0; i <= xfrm_state_hmask; i++) {
1327                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1328                         if (!xfrm_id_proto_match(x->id.proto, proto))
1329                                 continue;
1330                         if (last) {
1331                                 err = func(last, count, data);
1332                                 if (err)
1333                                         goto out;
1334                         }
1335                         last = x;
1336                         count++;
1337                 }
1338         }
1339         if (count == 0) {
1340                 err = -ENOENT;
1341                 goto out;
1342         }
1343         err = func(last, 0, data);
1344 out:
1345         spin_unlock_bh(&xfrm_state_lock);
1346         return err;
1347 }
1348 EXPORT_SYMBOL(xfrm_state_walk);
1349
1350
1351 void xfrm_replay_notify(struct xfrm_state *x, int event)
1352 {
1353         struct km_event c;
1354         /* we send notify messages in case
1355          *  1. we updated on of the sequence numbers, and the seqno difference
1356          *     is at least x->replay_maxdiff, in this case we also update the
1357          *     timeout of our timer function
1358          *  2. if x->replay_maxage has elapsed since last update,
1359          *     and there were changes
1360          *
1361          *  The state structure must be locked!
1362          */
1363
1364         switch (event) {
1365         case XFRM_REPLAY_UPDATE:
1366                 if (x->replay_maxdiff &&
1367                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1368                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1369                         if (x->xflags & XFRM_TIME_DEFER)
1370                                 event = XFRM_REPLAY_TIMEOUT;
1371                         else
1372                                 return;
1373                 }
1374
1375                 break;
1376
1377         case XFRM_REPLAY_TIMEOUT:
1378                 if ((x->replay.seq == x->preplay.seq) &&
1379                     (x->replay.bitmap == x->preplay.bitmap) &&
1380                     (x->replay.oseq == x->preplay.oseq)) {
1381                         x->xflags |= XFRM_TIME_DEFER;
1382                         return;
1383                 }
1384
1385                 break;
1386         }
1387
1388         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1389         c.event = XFRM_MSG_NEWAE;
1390         c.data.aevent = event;
1391         km_state_notify(x, &c);
1392
1393         if (x->replay_maxage &&
1394             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1395                 x->xflags &= ~XFRM_TIME_DEFER;
1396 }
1397 EXPORT_SYMBOL(xfrm_replay_notify);
1398
1399 static void xfrm_replay_timer_handler(unsigned long data)
1400 {
1401         struct xfrm_state *x = (struct xfrm_state*)data;
1402
1403         spin_lock(&x->lock);
1404
1405         if (x->km.state == XFRM_STATE_VALID) {
1406                 if (xfrm_aevent_is_on())
1407                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1408                 else
1409                         x->xflags |= XFRM_TIME_DEFER;
1410         }
1411
1412         spin_unlock(&x->lock);
1413 }
1414
1415 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1416 {
1417         u32 diff;
1418         u32 seq = ntohl(net_seq);
1419
1420         if (unlikely(seq == 0))
1421                 return -EINVAL;
1422
1423         if (likely(seq > x->replay.seq))
1424                 return 0;
1425
1426         diff = x->replay.seq - seq;
1427         if (diff >= min_t(unsigned int, x->props.replay_window,
1428                           sizeof(x->replay.bitmap) * 8)) {
1429                 x->stats.replay_window++;
1430                 return -EINVAL;
1431         }
1432
1433         if (x->replay.bitmap & (1U << diff)) {
1434                 x->stats.replay++;
1435                 return -EINVAL;
1436         }
1437         return 0;
1438 }
1439 EXPORT_SYMBOL(xfrm_replay_check);
1440
1441 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1442 {
1443         u32 diff;
1444         u32 seq = ntohl(net_seq);
1445
1446         if (seq > x->replay.seq) {
1447                 diff = seq - x->replay.seq;
1448                 if (diff < x->props.replay_window)
1449                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1450                 else
1451                         x->replay.bitmap = 1;
1452                 x->replay.seq = seq;
1453         } else {
1454                 diff = x->replay.seq - seq;
1455                 x->replay.bitmap |= (1U << diff);
1456         }
1457
1458         if (xfrm_aevent_is_on())
1459                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1460 }
1461 EXPORT_SYMBOL(xfrm_replay_advance);
1462
1463 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1464 static DEFINE_RWLOCK(xfrm_km_lock);
1465
1466 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1467 {
1468         struct xfrm_mgr *km;
1469
1470         read_lock(&xfrm_km_lock);
1471         list_for_each_entry(km, &xfrm_km_list, list)
1472                 if (km->notify_policy)
1473                         km->notify_policy(xp, dir, c);
1474         read_unlock(&xfrm_km_lock);
1475 }
1476
1477 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1478 {
1479         struct xfrm_mgr *km;
1480         read_lock(&xfrm_km_lock);
1481         list_for_each_entry(km, &xfrm_km_list, list)
1482                 if (km->notify)
1483                         km->notify(x, c);
1484         read_unlock(&xfrm_km_lock);
1485 }
1486
1487 EXPORT_SYMBOL(km_policy_notify);
1488 EXPORT_SYMBOL(km_state_notify);
1489
1490 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1491 {
1492         struct km_event c;
1493
1494         c.data.hard = hard;
1495         c.pid = pid;
1496         c.event = XFRM_MSG_EXPIRE;
1497         km_state_notify(x, &c);
1498
1499         if (hard)
1500                 wake_up(&km_waitq);
1501 }
1502
1503 EXPORT_SYMBOL(km_state_expired);
1504 /*
1505  * We send to all registered managers regardless of failure
1506  * We are happy with one success
1507 */
1508 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1509 {
1510         int err = -EINVAL, acqret;
1511         struct xfrm_mgr *km;
1512
1513         read_lock(&xfrm_km_lock);
1514         list_for_each_entry(km, &xfrm_km_list, list) {
1515                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1516                 if (!acqret)
1517                         err = acqret;
1518         }
1519         read_unlock(&xfrm_km_lock);
1520         return err;
1521 }
1522 EXPORT_SYMBOL(km_query);
1523
1524 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1525 {
1526         int err = -EINVAL;
1527         struct xfrm_mgr *km;
1528
1529         read_lock(&xfrm_km_lock);
1530         list_for_each_entry(km, &xfrm_km_list, list) {
1531                 if (km->new_mapping)
1532                         err = km->new_mapping(x, ipaddr, sport);
1533                 if (!err)
1534                         break;
1535         }
1536         read_unlock(&xfrm_km_lock);
1537         return err;
1538 }
1539 EXPORT_SYMBOL(km_new_mapping);
1540
1541 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1542 {
1543         struct km_event c;
1544
1545         c.data.hard = hard;
1546         c.pid = pid;
1547         c.event = XFRM_MSG_POLEXPIRE;
1548         km_policy_notify(pol, dir, &c);
1549
1550         if (hard)
1551                 wake_up(&km_waitq);
1552 }
1553 EXPORT_SYMBOL(km_policy_expired);
1554
1555 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1556                struct xfrm_migrate *m, int num_migrate)
1557 {
1558         int err = -EINVAL;
1559         int ret;
1560         struct xfrm_mgr *km;
1561
1562         read_lock(&xfrm_km_lock);
1563         list_for_each_entry(km, &xfrm_km_list, list) {
1564                 if (km->migrate) {
1565                         ret = km->migrate(sel, dir, type, m, num_migrate);
1566                         if (!ret)
1567                                 err = ret;
1568                 }
1569         }
1570         read_unlock(&xfrm_km_lock);
1571         return err;
1572 }
1573 EXPORT_SYMBOL(km_migrate);
1574
1575 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1576 {
1577         int err = -EINVAL;
1578         int ret;
1579         struct xfrm_mgr *km;
1580
1581         read_lock(&xfrm_km_lock);
1582         list_for_each_entry(km, &xfrm_km_list, list) {
1583                 if (km->report) {
1584                         ret = km->report(proto, sel, addr);
1585                         if (!ret)
1586                                 err = ret;
1587                 }
1588         }
1589         read_unlock(&xfrm_km_lock);
1590         return err;
1591 }
1592 EXPORT_SYMBOL(km_report);
1593
1594 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1595 {
1596         int err;
1597         u8 *data;
1598         struct xfrm_mgr *km;
1599         struct xfrm_policy *pol = NULL;
1600
1601         if (optlen <= 0 || optlen > PAGE_SIZE)
1602                 return -EMSGSIZE;
1603
1604         data = kmalloc(optlen, GFP_KERNEL);
1605         if (!data)
1606                 return -ENOMEM;
1607
1608         err = -EFAULT;
1609         if (copy_from_user(data, optval, optlen))
1610                 goto out;
1611
1612         err = -EINVAL;
1613         read_lock(&xfrm_km_lock);
1614         list_for_each_entry(km, &xfrm_km_list, list) {
1615                 pol = km->compile_policy(sk, optname, data,
1616                                          optlen, &err);
1617                 if (err >= 0)
1618                         break;
1619         }
1620         read_unlock(&xfrm_km_lock);
1621
1622         if (err >= 0) {
1623                 xfrm_sk_policy_insert(sk, err, pol);
1624                 xfrm_pol_put(pol);
1625                 err = 0;
1626         }
1627
1628 out:
1629         kfree(data);
1630         return err;
1631 }
1632 EXPORT_SYMBOL(xfrm_user_policy);
1633
1634 int xfrm_register_km(struct xfrm_mgr *km)
1635 {
1636         write_lock_bh(&xfrm_km_lock);
1637         list_add_tail(&km->list, &xfrm_km_list);
1638         write_unlock_bh(&xfrm_km_lock);
1639         return 0;
1640 }
1641 EXPORT_SYMBOL(xfrm_register_km);
1642
1643 int xfrm_unregister_km(struct xfrm_mgr *km)
1644 {
1645         write_lock_bh(&xfrm_km_lock);
1646         list_del(&km->list);
1647         write_unlock_bh(&xfrm_km_lock);
1648         return 0;
1649 }
1650 EXPORT_SYMBOL(xfrm_unregister_km);
1651
1652 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1653 {
1654         int err = 0;
1655         if (unlikely(afinfo == NULL))
1656                 return -EINVAL;
1657         if (unlikely(afinfo->family >= NPROTO))
1658                 return -EAFNOSUPPORT;
1659         write_lock_bh(&xfrm_state_afinfo_lock);
1660         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1661                 err = -ENOBUFS;
1662         else
1663                 xfrm_state_afinfo[afinfo->family] = afinfo;
1664         write_unlock_bh(&xfrm_state_afinfo_lock);
1665         return err;
1666 }
1667 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1668
1669 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1670 {
1671         int err = 0;
1672         if (unlikely(afinfo == NULL))
1673                 return -EINVAL;
1674         if (unlikely(afinfo->family >= NPROTO))
1675                 return -EAFNOSUPPORT;
1676         write_lock_bh(&xfrm_state_afinfo_lock);
1677         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1678                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1679                         err = -EINVAL;
1680                 else
1681                         xfrm_state_afinfo[afinfo->family] = NULL;
1682         }
1683         write_unlock_bh(&xfrm_state_afinfo_lock);
1684         return err;
1685 }
1686 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1687
1688 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1689 {
1690         struct xfrm_state_afinfo *afinfo;
1691         if (unlikely(family >= NPROTO))
1692                 return NULL;
1693         read_lock(&xfrm_state_afinfo_lock);
1694         afinfo = xfrm_state_afinfo[family];
1695         if (unlikely(!afinfo))
1696                 read_unlock(&xfrm_state_afinfo_lock);
1697         return afinfo;
1698 }
1699
1700 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1701 {
1702         read_unlock(&xfrm_state_afinfo_lock);
1703 }
1704
1705 EXPORT_SYMBOL(xfrm_state_get_afinfo);
1706 EXPORT_SYMBOL(xfrm_state_put_afinfo);
1707
1708 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1709 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1710 {
1711         if (x->tunnel) {
1712                 struct xfrm_state *t = x->tunnel;
1713
1714                 if (atomic_read(&t->tunnel_users) == 2)
1715                         xfrm_state_delete(t);
1716                 atomic_dec(&t->tunnel_users);
1717                 xfrm_state_put(t);
1718                 x->tunnel = NULL;
1719         }
1720 }
1721 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1722
1723 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1724 {
1725         int res;
1726
1727         spin_lock_bh(&x->lock);
1728         if (x->km.state == XFRM_STATE_VALID &&
1729             x->type && x->type->get_mtu)
1730                 res = x->type->get_mtu(x, mtu);
1731         else
1732                 res = mtu - x->props.header_len;
1733         spin_unlock_bh(&x->lock);
1734         return res;
1735 }
1736
1737 int xfrm_init_state(struct xfrm_state *x)
1738 {
1739         struct xfrm_state_afinfo *afinfo;
1740         int family = x->props.family;
1741         int err;
1742
1743         err = -EAFNOSUPPORT;
1744         afinfo = xfrm_state_get_afinfo(family);
1745         if (!afinfo)
1746                 goto error;
1747
1748         err = 0;
1749         if (afinfo->init_flags)
1750                 err = afinfo->init_flags(x);
1751
1752         xfrm_state_put_afinfo(afinfo);
1753
1754         if (err)
1755                 goto error;
1756
1757         err = -EPROTONOSUPPORT;
1758         x->type = xfrm_get_type(x->id.proto, family);
1759         if (x->type == NULL)
1760                 goto error;
1761
1762         err = x->type->init_state(x);
1763         if (err)
1764                 goto error;
1765
1766         x->mode = xfrm_get_mode(x->props.mode, family);
1767         if (x->mode == NULL)
1768                 goto error;
1769
1770         x->km.state = XFRM_STATE_VALID;
1771
1772 error:
1773         return err;
1774 }
1775
1776 EXPORT_SYMBOL(xfrm_init_state);
1777
1778 void __init xfrm_state_init(void)
1779 {
1780         unsigned int sz;
1781
1782         sz = sizeof(struct hlist_head) * 8;
1783
1784         xfrm_state_bydst = xfrm_hash_alloc(sz);
1785         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1786         xfrm_state_byspi = xfrm_hash_alloc(sz);
1787         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1788                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1789         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1790
1791         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1792 }
1793