57c8ee66491eb8a66944afa3250396c36b11b218
[sfrench/cifs-2.6.git] / net / netfilter / ipvs / ip_vs_proto_sctp.c
1 #include <linux/kernel.h>
2 #include <linux/ip.h>
3 #include <linux/sctp.h>
4 #include <net/ip.h>
5 #include <net/ip6_checksum.h>
6 #include <linux/netfilter.h>
7 #include <linux/netfilter_ipv4.h>
8 #include <net/sctp/checksum.h>
9 #include <net/ip_vs.h>
10
11 static int
12 sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
13                    struct ip_vs_proto_data *pd,
14                    int *verdict, struct ip_vs_conn **cpp,
15                    struct ip_vs_iphdr *iph)
16 {
17         struct ip_vs_service *svc;
18         struct sctp_chunkhdr _schunkh, *sch;
19         struct sctphdr *sh, _sctph;
20         __be16 _ports[2], *ports = NULL;
21
22         if (likely(!ip_vs_iph_icmp(iph))) {
23                 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
24                 if (sh) {
25                         sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
26                                                  sizeof(_schunkh), &_schunkh);
27                         if (sch) {
28                                 if (sch->type == SCTP_CID_ABORT ||
29                                     !(sysctl_sloppy_sctp(ipvs) ||
30                                       sch->type == SCTP_CID_INIT))
31                                         return 1;
32                                 ports = &sh->source;
33                         }
34                 }
35         } else {
36                 ports = skb_header_pointer(
37                         skb, iph->len, sizeof(_ports), &_ports);
38         }
39
40         if (!ports) {
41                 *verdict = NF_DROP;
42                 return 0;
43         }
44
45         if (likely(!ip_vs_iph_inverse(iph)))
46                 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
47                                          &iph->daddr, ports[1]);
48         else
49                 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
50                                          &iph->saddr, ports[0]);
51         if (svc) {
52                 int ignored;
53
54                 if (ip_vs_todrop(ipvs)) {
55                         /*
56                          * It seems that we are very loaded.
57                          * We have to drop this packet :(
58                          */
59                         *verdict = NF_DROP;
60                         return 0;
61                 }
62                 /*
63                  * Let the virtual server select a real server for the
64                  * incoming connection, and create a connection entry.
65                  */
66                 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
67                 if (!*cpp && ignored <= 0) {
68                         if (!ignored)
69                                 *verdict = ip_vs_leave(svc, skb, pd, iph);
70                         else
71                                 *verdict = NF_DROP;
72                         return 0;
73                 }
74         }
75         /* NF_ACCEPT */
76         return 1;
77 }
78
79 static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph,
80                           unsigned int sctphoff)
81 {
82         sctph->checksum = sctp_compute_cksum(skb, sctphoff);
83         skb->ip_summed = CHECKSUM_UNNECESSARY;
84 }
85
86 static int
87 sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
88                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
89 {
90         struct sctphdr *sctph;
91         unsigned int sctphoff = iph->len;
92         bool payload_csum = false;
93
94 #ifdef CONFIG_IP_VS_IPV6
95         if (cp->af == AF_INET6 && iph->fragoffs)
96                 return 1;
97 #endif
98
99         /* csum_check requires unshared skb */
100         if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
101                 return 0;
102
103         if (unlikely(cp->app != NULL)) {
104                 int ret;
105
106                 /* Some checks before mangling */
107                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
108                         return 0;
109
110                 /* Call application helper if needed */
111                 ret = ip_vs_app_pkt_out(cp, skb);
112                 if (ret == 0)
113                         return 0;
114                 /* ret=2: csum update is needed after payload mangling */
115                 if (ret == 2)
116                         payload_csum = true;
117         }
118
119         sctph = (void *) skb_network_header(skb) + sctphoff;
120
121         /* Only update csum if we really have to */
122         if (sctph->source != cp->vport || payload_csum ||
123             skb->ip_summed == CHECKSUM_PARTIAL) {
124                 sctph->source = cp->vport;
125                 sctp_nat_csum(skb, sctph, sctphoff);
126         } else {
127                 skb->ip_summed = CHECKSUM_UNNECESSARY;
128         }
129
130         return 1;
131 }
132
133 static int
134 sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
135                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
136 {
137         struct sctphdr *sctph;
138         unsigned int sctphoff = iph->len;
139         bool payload_csum = false;
140
141 #ifdef CONFIG_IP_VS_IPV6
142         if (cp->af == AF_INET6 && iph->fragoffs)
143                 return 1;
144 #endif
145
146         /* csum_check requires unshared skb */
147         if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
148                 return 0;
149
150         if (unlikely(cp->app != NULL)) {
151                 int ret;
152
153                 /* Some checks before mangling */
154                 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
155                         return 0;
156
157                 /* Call application helper if needed */
158                 ret = ip_vs_app_pkt_in(cp, skb);
159                 if (ret == 0)
160                         return 0;
161                 /* ret=2: csum update is needed after payload mangling */
162                 if (ret == 2)
163                         payload_csum = true;
164         }
165
166         sctph = (void *) skb_network_header(skb) + sctphoff;
167
168         /* Only update csum if we really have to */
169         if (sctph->dest != cp->dport || payload_csum ||
170             (skb->ip_summed == CHECKSUM_PARTIAL &&
171              !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
172                 sctph->dest = cp->dport;
173                 sctp_nat_csum(skb, sctph, sctphoff);
174         } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
175                 skb->ip_summed = CHECKSUM_UNNECESSARY;
176         }
177
178         return 1;
179 }
180
181 static int
182 sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
183 {
184         unsigned int sctphoff;
185         struct sctphdr *sh, _sctph;
186         __le32 cmp, val;
187
188 #ifdef CONFIG_IP_VS_IPV6
189         if (af == AF_INET6)
190                 sctphoff = sizeof(struct ipv6hdr);
191         else
192 #endif
193                 sctphoff = ip_hdrlen(skb);
194
195         sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
196         if (sh == NULL)
197                 return 0;
198
199         cmp = sh->checksum;
200         val = sctp_compute_cksum(skb, sctphoff);
201
202         if (val != cmp) {
203                 /* CRC failure, dump it. */
204                 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
205                                 "Failed checksum for");
206                 return 0;
207         }
208         return 1;
209 }
210
211 enum ipvs_sctp_event_t {
212         IP_VS_SCTP_DATA = 0,            /* DATA, SACK, HEARTBEATs */
213         IP_VS_SCTP_INIT,
214         IP_VS_SCTP_INIT_ACK,
215         IP_VS_SCTP_COOKIE_ECHO,
216         IP_VS_SCTP_COOKIE_ACK,
217         IP_VS_SCTP_SHUTDOWN,
218         IP_VS_SCTP_SHUTDOWN_ACK,
219         IP_VS_SCTP_SHUTDOWN_COMPLETE,
220         IP_VS_SCTP_ERROR,
221         IP_VS_SCTP_ABORT,
222         IP_VS_SCTP_EVENT_LAST
223 };
224
225 /* RFC 2960, 3.2 Chunk Field Descriptions */
226 static __u8 sctp_events[] = {
227         [SCTP_CID_DATA]                 = IP_VS_SCTP_DATA,
228         [SCTP_CID_INIT]                 = IP_VS_SCTP_INIT,
229         [SCTP_CID_INIT_ACK]             = IP_VS_SCTP_INIT_ACK,
230         [SCTP_CID_SACK]                 = IP_VS_SCTP_DATA,
231         [SCTP_CID_HEARTBEAT]            = IP_VS_SCTP_DATA,
232         [SCTP_CID_HEARTBEAT_ACK]        = IP_VS_SCTP_DATA,
233         [SCTP_CID_ABORT]                = IP_VS_SCTP_ABORT,
234         [SCTP_CID_SHUTDOWN]             = IP_VS_SCTP_SHUTDOWN,
235         [SCTP_CID_SHUTDOWN_ACK]         = IP_VS_SCTP_SHUTDOWN_ACK,
236         [SCTP_CID_ERROR]                = IP_VS_SCTP_ERROR,
237         [SCTP_CID_COOKIE_ECHO]          = IP_VS_SCTP_COOKIE_ECHO,
238         [SCTP_CID_COOKIE_ACK]           = IP_VS_SCTP_COOKIE_ACK,
239         [SCTP_CID_ECN_ECNE]             = IP_VS_SCTP_DATA,
240         [SCTP_CID_ECN_CWR]              = IP_VS_SCTP_DATA,
241         [SCTP_CID_SHUTDOWN_COMPLETE]    = IP_VS_SCTP_SHUTDOWN_COMPLETE,
242 };
243
244 /* SCTP States:
245  * See RFC 2960, 4. SCTP Association State Diagram
246  *
247  * New states (not in diagram):
248  * - INIT1 state: use shorter timeout for dropped INIT packets
249  * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
250  * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
251  *
252  * The states are as seen in real server. In the diagram, INIT1, INIT,
253  * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
254  *
255  * States as per packets from client (C) and server (S):
256  *
257  * Setup of client connection:
258  * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
259  * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
260  * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
261  * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
262  *
263  * Setup of server connection:
264  * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
265  * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
266  * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
267  */
268
269 #define sNO IP_VS_SCTP_S_NONE
270 #define sI1 IP_VS_SCTP_S_INIT1
271 #define sIN IP_VS_SCTP_S_INIT
272 #define sCS IP_VS_SCTP_S_COOKIE_SENT
273 #define sCR IP_VS_SCTP_S_COOKIE_REPLIED
274 #define sCW IP_VS_SCTP_S_COOKIE_WAIT
275 #define sCO IP_VS_SCTP_S_COOKIE
276 #define sCE IP_VS_SCTP_S_COOKIE_ECHOED
277 #define sES IP_VS_SCTP_S_ESTABLISHED
278 #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
279 #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
280 #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
281 #define sRJ IP_VS_SCTP_S_REJECTED
282 #define sCL IP_VS_SCTP_S_CLOSED
283
284 static const __u8 sctp_states
285         [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
286         { /* INPUT */
287 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
288 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
289 /* i   */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
290 /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
291 /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
292 /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
293 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
294 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
295 /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
296 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
297 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
298         },
299         { /* OUTPUT */
300 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
301 /* d   */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
302 /* i   */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
303 /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
304 /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
305 /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
306 /* s   */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
307 /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
308 /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
309 /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
310 /* ab  */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
311         },
312         { /* INPUT-ONLY */
313 /*        sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
314 /* d   */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
315 /* i   */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
316 /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
317 /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
318 /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
319 /* s   */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
320 /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
321 /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
322 /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
323 /* ab  */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
324         },
325 };
326
327 #define IP_VS_SCTP_MAX_RTO      ((60 + 1) * HZ)
328
329 /* Timeout table[state] */
330 static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
331         [IP_VS_SCTP_S_NONE]                     = 2 * HZ,
332         [IP_VS_SCTP_S_INIT1]                    = (0 + 3 + 1) * HZ,
333         [IP_VS_SCTP_S_INIT]                     = IP_VS_SCTP_MAX_RTO,
334         [IP_VS_SCTP_S_COOKIE_SENT]              = IP_VS_SCTP_MAX_RTO,
335         [IP_VS_SCTP_S_COOKIE_REPLIED]           = IP_VS_SCTP_MAX_RTO,
336         [IP_VS_SCTP_S_COOKIE_WAIT]              = IP_VS_SCTP_MAX_RTO,
337         [IP_VS_SCTP_S_COOKIE]                   = IP_VS_SCTP_MAX_RTO,
338         [IP_VS_SCTP_S_COOKIE_ECHOED]            = IP_VS_SCTP_MAX_RTO,
339         [IP_VS_SCTP_S_ESTABLISHED]              = 15 * 60 * HZ,
340         [IP_VS_SCTP_S_SHUTDOWN_SENT]            = IP_VS_SCTP_MAX_RTO,
341         [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = IP_VS_SCTP_MAX_RTO,
342         [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = IP_VS_SCTP_MAX_RTO,
343         [IP_VS_SCTP_S_REJECTED]                 = (0 + 3 + 1) * HZ,
344         [IP_VS_SCTP_S_CLOSED]                   = IP_VS_SCTP_MAX_RTO,
345         [IP_VS_SCTP_S_LAST]                     = 2 * HZ,
346 };
347
348 static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
349         [IP_VS_SCTP_S_NONE]                     = "NONE",
350         [IP_VS_SCTP_S_INIT1]                    = "INIT1",
351         [IP_VS_SCTP_S_INIT]                     = "INIT",
352         [IP_VS_SCTP_S_COOKIE_SENT]              = "C-SENT",
353         [IP_VS_SCTP_S_COOKIE_REPLIED]           = "C-REPLIED",
354         [IP_VS_SCTP_S_COOKIE_WAIT]              = "C-WAIT",
355         [IP_VS_SCTP_S_COOKIE]                   = "COOKIE",
356         [IP_VS_SCTP_S_COOKIE_ECHOED]            = "C-ECHOED",
357         [IP_VS_SCTP_S_ESTABLISHED]              = "ESTABLISHED",
358         [IP_VS_SCTP_S_SHUTDOWN_SENT]            = "S-SENT",
359         [IP_VS_SCTP_S_SHUTDOWN_RECEIVED]        = "S-RECEIVED",
360         [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT]        = "S-ACK-SENT",
361         [IP_VS_SCTP_S_REJECTED]                 = "REJECTED",
362         [IP_VS_SCTP_S_CLOSED]                   = "CLOSED",
363         [IP_VS_SCTP_S_LAST]                     = "BUG!",
364 };
365
366
367 static const char *sctp_state_name(int state)
368 {
369         if (state >= IP_VS_SCTP_S_LAST)
370                 return "ERR!";
371         if (sctp_state_name_table[state])
372                 return sctp_state_name_table[state];
373         return "?";
374 }
375
376 static inline void
377 set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
378                 int direction, const struct sk_buff *skb)
379 {
380         struct sctp_chunkhdr _sctpch, *sch;
381         unsigned char chunk_type;
382         int event, next_state;
383         int ihl, cofs;
384
385 #ifdef CONFIG_IP_VS_IPV6
386         ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
387 #else
388         ihl = ip_hdrlen(skb);
389 #endif
390
391         cofs = ihl + sizeof(struct sctphdr);
392         sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
393         if (sch == NULL)
394                 return;
395
396         chunk_type = sch->type;
397         /*
398          * Section 3: Multiple chunks can be bundled into one SCTP packet
399          * up to the MTU size, except for the INIT, INIT ACK, and
400          * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
401          * any other chunk in a packet.
402          *
403          * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
404          * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
405          * bundled with an ABORT, but they MUST be placed before the ABORT
406          * in the SCTP packet or they will be ignored by the receiver.
407          */
408         if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
409             (sch->type == SCTP_CID_COOKIE_ACK)) {
410                 int clen = ntohs(sch->length);
411
412                 if (clen >= sizeof(_sctpch)) {
413                         sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
414                                                  sizeof(_sctpch), &_sctpch);
415                         if (sch && sch->type == SCTP_CID_ABORT)
416                                 chunk_type = sch->type;
417                 }
418         }
419
420         event = (chunk_type < sizeof(sctp_events)) ?
421                 sctp_events[chunk_type] : IP_VS_SCTP_DATA;
422
423         /* Update direction to INPUT_ONLY if necessary
424          * or delete NO_OUTPUT flag if output packet detected
425          */
426         if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
427                 if (direction == IP_VS_DIR_OUTPUT)
428                         cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
429                 else
430                         direction = IP_VS_DIR_INPUT_ONLY;
431         }
432
433         next_state = sctp_states[direction][event][cp->state];
434
435         if (next_state != cp->state) {
436                 struct ip_vs_dest *dest = cp->dest;
437
438                 IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
439                                 "%s:%d state: %s->%s conn->refcnt:%d\n",
440                                 pd->pp->name,
441                                 ((direction == IP_VS_DIR_OUTPUT) ?
442                                  "output " : "input "),
443                                 IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
444                                 ntohs(cp->dport),
445                                 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
446                                 ntohs(cp->cport),
447                                 sctp_state_name(cp->state),
448                                 sctp_state_name(next_state),
449                                 refcount_read(&cp->refcnt));
450                 if (dest) {
451                         if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
452                                 (next_state != IP_VS_SCTP_S_ESTABLISHED)) {
453                                 atomic_dec(&dest->activeconns);
454                                 atomic_inc(&dest->inactconns);
455                                 cp->flags |= IP_VS_CONN_F_INACTIVE;
456                         } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
457                                    (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
458                                 atomic_inc(&dest->activeconns);
459                                 atomic_dec(&dest->inactconns);
460                                 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
461                         }
462                 }
463         }
464         if (likely(pd))
465                 cp->timeout = pd->timeout_table[cp->state = next_state];
466         else    /* What to do ? */
467                 cp->timeout = sctp_timeouts[cp->state = next_state];
468 }
469
470 static void
471 sctp_state_transition(struct ip_vs_conn *cp, int direction,
472                 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
473 {
474         spin_lock_bh(&cp->lock);
475         set_sctp_state(pd, cp, direction, skb);
476         spin_unlock_bh(&cp->lock);
477 }
478
479 static inline __u16 sctp_app_hashkey(__be16 port)
480 {
481         return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
482                 & SCTP_APP_TAB_MASK;
483 }
484
485 static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
486 {
487         struct ip_vs_app *i;
488         __u16 hash;
489         __be16 port = inc->port;
490         int ret = 0;
491         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
492
493         hash = sctp_app_hashkey(port);
494
495         list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
496                 if (i->port == port) {
497                         ret = -EEXIST;
498                         goto out;
499                 }
500         }
501         list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
502         atomic_inc(&pd->appcnt);
503 out:
504
505         return ret;
506 }
507
508 static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
509 {
510         struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
511
512         atomic_dec(&pd->appcnt);
513         list_del_rcu(&inc->p_list);
514 }
515
516 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
517 {
518         struct netns_ipvs *ipvs = cp->ipvs;
519         int hash;
520         struct ip_vs_app *inc;
521         int result = 0;
522
523         /* Default binding: bind app only for NAT */
524         if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
525                 return 0;
526         /* Lookup application incarnations and bind the right one */
527         hash = sctp_app_hashkey(cp->vport);
528
529         list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
530                 if (inc->port == cp->vport) {
531                         if (unlikely(!ip_vs_app_inc_get(inc)))
532                                 break;
533
534                         IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
535                                         "%s:%u to app %s on port %u\n",
536                                         __func__,
537                                         IP_VS_DBG_ADDR(cp->af, &cp->caddr),
538                                         ntohs(cp->cport),
539                                         IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
540                                         ntohs(cp->vport),
541                                         inc->name, ntohs(inc->port));
542                         cp->app = inc;
543                         if (inc->init_conn)
544                                 result = inc->init_conn(inc, cp);
545                         break;
546                 }
547         }
548
549         return result;
550 }
551
552 /* ---------------------------------------------
553  *   timeouts is netns related now.
554  * ---------------------------------------------
555  */
556 static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
557 {
558         ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
559         pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
560                                                         sizeof(sctp_timeouts));
561         if (!pd->timeout_table)
562                 return -ENOMEM;
563         return 0;
564 }
565
566 static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
567 {
568         kfree(pd->timeout_table);
569 }
570
571 struct ip_vs_protocol ip_vs_protocol_sctp = {
572         .name           = "SCTP",
573         .protocol       = IPPROTO_SCTP,
574         .num_states     = IP_VS_SCTP_S_LAST,
575         .dont_defrag    = 0,
576         .init           = NULL,
577         .exit           = NULL,
578         .init_netns     = __ip_vs_sctp_init,
579         .exit_netns     = __ip_vs_sctp_exit,
580         .register_app   = sctp_register_app,
581         .unregister_app = sctp_unregister_app,
582         .conn_schedule  = sctp_conn_schedule,
583         .conn_in_get    = ip_vs_conn_in_get_proto,
584         .conn_out_get   = ip_vs_conn_out_get_proto,
585         .snat_handler   = sctp_snat_handler,
586         .dnat_handler   = sctp_dnat_handler,
587         .csum_check     = sctp_csum_check,
588         .state_name     = sctp_state_name,
589         .state_transition = sctp_state_transition,
590         .app_conn_bind  = sctp_app_conn_bind,
591         .debug_packet   = ip_vs_tcpudp_debug_packet,
592         .timeout_change = NULL,
593 };