Merge tag 'usb-3.18-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
[sfrench/cifs-2.6.git] / net / netfilter / nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21
22 #include <net/tcp.h>
23
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_log.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35
36 /* "Be conservative in what you do,
37     be liberal in what you accept from others."
38     If it's non-zero, we mark only out of window RST segments as INVALID. */
39 static int nf_ct_tcp_be_liberal __read_mostly = 0;
40
41 /* If it is set to zero, we disable picking up already established
42    connections. */
43 static int nf_ct_tcp_loose __read_mostly = 1;
44
45 /* Max number of the retransmitted packets without receiving an (acceptable)
46    ACK from the destination. If this number is reached, a shorter timer
47    will be started. */
48 static int nf_ct_tcp_max_retrans __read_mostly = 3;
49
50   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
51      closely.  They're more complex. --RR */
52
53 static const char *const tcp_conntrack_names[] = {
54         "NONE",
55         "SYN_SENT",
56         "SYN_RECV",
57         "ESTABLISHED",
58         "FIN_WAIT",
59         "CLOSE_WAIT",
60         "LAST_ACK",
61         "TIME_WAIT",
62         "CLOSE",
63         "SYN_SENT2",
64 };
65
66 #define SECS * HZ
67 #define MINS * 60 SECS
68 #define HOURS * 60 MINS
69 #define DAYS * 24 HOURS
70
71 static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
72         [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
73         [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
74         [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
75         [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
76         [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
77         [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
78         [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
79         [TCP_CONNTRACK_CLOSE]           = 10 SECS,
80         [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
81 /* RFC1122 says the R2 limit should be at least 100 seconds.
82    Linux uses 15 packets as limit, which corresponds
83    to ~13-30min depending on RTO. */
84         [TCP_CONNTRACK_RETRANS]         = 5 MINS,
85         [TCP_CONNTRACK_UNACK]           = 5 MINS,
86 };
87
88 #define sNO TCP_CONNTRACK_NONE
89 #define sSS TCP_CONNTRACK_SYN_SENT
90 #define sSR TCP_CONNTRACK_SYN_RECV
91 #define sES TCP_CONNTRACK_ESTABLISHED
92 #define sFW TCP_CONNTRACK_FIN_WAIT
93 #define sCW TCP_CONNTRACK_CLOSE_WAIT
94 #define sLA TCP_CONNTRACK_LAST_ACK
95 #define sTW TCP_CONNTRACK_TIME_WAIT
96 #define sCL TCP_CONNTRACK_CLOSE
97 #define sS2 TCP_CONNTRACK_SYN_SENT2
98 #define sIV TCP_CONNTRACK_MAX
99 #define sIG TCP_CONNTRACK_IGNORE
100
101 /* What TCP flags are set from RST/SYN/FIN/ACK. */
102 enum tcp_bit_set {
103         TCP_SYN_SET,
104         TCP_SYNACK_SET,
105         TCP_FIN_SET,
106         TCP_ACK_SET,
107         TCP_RST_SET,
108         TCP_NONE_SET,
109 };
110
111 /*
112  * The TCP state transition table needs a few words...
113  *
114  * We are the man in the middle. All the packets go through us
115  * but might get lost in transit to the destination.
116  * It is assumed that the destinations can't receive segments
117  * we haven't seen.
118  *
119  * The checked segment is in window, but our windows are *not*
120  * equivalent with the ones of the sender/receiver. We always
121  * try to guess the state of the current sender.
122  *
123  * The meaning of the states are:
124  *
125  * NONE:        initial state
126  * SYN_SENT:    SYN-only packet seen
127  * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
128  * SYN_RECV:    SYN-ACK packet seen
129  * ESTABLISHED: ACK packet seen
130  * FIN_WAIT:    FIN packet seen
131  * CLOSE_WAIT:  ACK seen (after FIN)
132  * LAST_ACK:    FIN seen (after FIN)
133  * TIME_WAIT:   last ACK seen
134  * CLOSE:       closed connection (RST)
135  *
136  * Packets marked as IGNORED (sIG):
137  *      if they may be either invalid or valid
138  *      and the receiver may send back a connection
139  *      closing RST or a SYN/ACK.
140  *
141  * Packets marked as INVALID (sIV):
142  *      if we regard them as truly invalid packets
143  */
144 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
145         {
146 /* ORIGINAL */
147 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
148 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
149 /*
150  *      sNO -> sSS      Initialize a new connection
151  *      sSS -> sSS      Retransmitted SYN
152  *      sS2 -> sS2      Late retransmitted SYN
153  *      sSR -> sIG
154  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
155  *                      are errors. Receiver will reply with RST
156  *                      and close the connection.
157  *                      Or we are not in sync and hold a dead connection.
158  *      sFW -> sIG
159  *      sCW -> sIG
160  *      sLA -> sIG
161  *      sTW -> sSS      Reopened connection (RFC 1122).
162  *      sCL -> sSS
163  */
164 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
165 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
166 /*
167  *      sNO -> sIV      Too late and no reason to do anything
168  *      sSS -> sIV      Client can't send SYN and then SYN/ACK
169  *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
170  *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
171  *      sES -> sIV      Invalid SYN/ACK packets sent by the client
172  *      sFW -> sIV
173  *      sCW -> sIV
174  *      sLA -> sIV
175  *      sTW -> sIV
176  *      sCL -> sIV
177  */
178 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
179 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
180 /*
181  *      sNO -> sIV      Too late and no reason to do anything...
182  *      sSS -> sIV      Client migth not send FIN in this state:
183  *                      we enforce waiting for a SYN/ACK reply first.
184  *      sS2 -> sIV
185  *      sSR -> sFW      Close started.
186  *      sES -> sFW
187  *      sFW -> sLA      FIN seen in both directions, waiting for
188  *                      the last ACK.
189  *                      Migth be a retransmitted FIN as well...
190  *      sCW -> sLA
191  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
192  *      sTW -> sTW
193  *      sCL -> sCL
194  */
195 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
196 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
197 /*
198  *      sNO -> sES      Assumed.
199  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
200  *      sS2 -> sIV
201  *      sSR -> sES      Established state is reached.
202  *      sES -> sES      :-)
203  *      sFW -> sCW      Normal close request answered by ACK.
204  *      sCW -> sCW
205  *      sLA -> sTW      Last ACK detected.
206  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
207  *      sCL -> sCL
208  */
209 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
210 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
211 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
212         },
213         {
214 /* REPLY */
215 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
216 /*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
217 /*
218  *      sNO -> sIV      Never reached.
219  *      sSS -> sS2      Simultaneous open
220  *      sS2 -> sS2      Retransmitted simultaneous SYN
221  *      sSR -> sIV      Invalid SYN packets sent by the server
222  *      sES -> sIV
223  *      sFW -> sIV
224  *      sCW -> sIV
225  *      sLA -> sIV
226  *      sTW -> sSS      Reopened connection, but server may have switched role
227  *      sCL -> sIV
228  */
229 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
230 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
231 /*
232  *      sSS -> sSR      Standard open.
233  *      sS2 -> sSR      Simultaneous open
234  *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
235  *      sES -> sIG      Late retransmitted SYN/ACK?
236  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
237  *      sCW -> sIG
238  *      sLA -> sIG
239  *      sTW -> sIG
240  *      sCL -> sIG
241  */
242 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
243 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
244 /*
245  *      sSS -> sIV      Server might not send FIN in this state.
246  *      sS2 -> sIV
247  *      sSR -> sFW      Close started.
248  *      sES -> sFW
249  *      sFW -> sLA      FIN seen in both directions.
250  *      sCW -> sLA
251  *      sLA -> sLA      Retransmitted FIN.
252  *      sTW -> sTW
253  *      sCL -> sCL
254  */
255 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
256 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
257 /*
258  *      sSS -> sIG      Might be a half-open connection.
259  *      sS2 -> sIG
260  *      sSR -> sSR      Might answer late resent SYN.
261  *      sES -> sES      :-)
262  *      sFW -> sCW      Normal close request answered by ACK.
263  *      sCW -> sCW
264  *      sLA -> sTW      Last ACK detected.
265  *      sTW -> sTW      Retransmitted last ACK.
266  *      sCL -> sCL
267  */
268 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
269 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
270 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
271         }
272 };
273
274 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
275 {
276         return &net->ct.nf_ct_proto.tcp;
277 }
278
279 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
280                              struct nf_conntrack_tuple *tuple)
281 {
282         const struct tcphdr *hp;
283         struct tcphdr _hdr;
284
285         /* Actually only need first 8 bytes. */
286         hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
287         if (hp == NULL)
288                 return false;
289
290         tuple->src.u.tcp.port = hp->source;
291         tuple->dst.u.tcp.port = hp->dest;
292
293         return true;
294 }
295
296 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
297                              const struct nf_conntrack_tuple *orig)
298 {
299         tuple->src.u.tcp.port = orig->dst.u.tcp.port;
300         tuple->dst.u.tcp.port = orig->src.u.tcp.port;
301         return true;
302 }
303
304 /* Print out the per-protocol part of the tuple. */
305 static int tcp_print_tuple(struct seq_file *s,
306                            const struct nf_conntrack_tuple *tuple)
307 {
308         return seq_printf(s, "sport=%hu dport=%hu ",
309                           ntohs(tuple->src.u.tcp.port),
310                           ntohs(tuple->dst.u.tcp.port));
311 }
312
313 /* Print out the private part of the conntrack. */
314 static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
315 {
316         enum tcp_conntrack state;
317
318         spin_lock_bh(&ct->lock);
319         state = ct->proto.tcp.state;
320         spin_unlock_bh(&ct->lock);
321
322         return seq_printf(s, "%s ", tcp_conntrack_names[state]);
323 }
324
325 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
326 {
327         if (tcph->rst) return TCP_RST_SET;
328         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
329         else if (tcph->fin) return TCP_FIN_SET;
330         else if (tcph->ack) return TCP_ACK_SET;
331         else return TCP_NONE_SET;
332 }
333
334 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
335    in IP Filter' by Guido van Rooij.
336
337    http://www.sane.nl/events/sane2000/papers.html
338    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
339
340    The boundaries and the conditions are changed according to RFC793:
341    the packet must intersect the window (i.e. segments may be
342    after the right or before the left edge) and thus receivers may ACK
343    segments after the right edge of the window.
344
345         td_maxend = max(sack + max(win,1)) seen in reply packets
346         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
347         td_maxwin += seq + len - sender.td_maxend
348                         if seq + len > sender.td_maxend
349         td_end    = max(seq + len) seen in sent packets
350
351    I.   Upper bound for valid data:     seq <= sender.td_maxend
352    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
353    III. Upper bound for valid (s)ack:   sack <= receiver.td_end
354    IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
355
356    where sack is the highest right edge of sack block found in the packet
357    or ack in the case of packet without SACK option.
358
359    The upper bound limit for a valid (s)ack is not ignored -
360    we doesn't have to deal with fragments.
361 */
362
363 static inline __u32 segment_seq_plus_len(__u32 seq,
364                                          size_t len,
365                                          unsigned int dataoff,
366                                          const struct tcphdr *tcph)
367 {
368         /* XXX Should I use payload length field in IP/IPv6 header ?
369          * - YK */
370         return (seq + len - dataoff - tcph->doff*4
371                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
372 }
373
374 /* Fixme: what about big packets? */
375 #define MAXACKWINCONST                  66000
376 #define MAXACKWINDOW(sender)                                            \
377         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
378                                               : MAXACKWINCONST)
379
380 /*
381  * Simplified tcp_parse_options routine from tcp_input.c
382  */
383 static void tcp_options(const struct sk_buff *skb,
384                         unsigned int dataoff,
385                         const struct tcphdr *tcph,
386                         struct ip_ct_tcp_state *state)
387 {
388         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
389         const unsigned char *ptr;
390         int length = (tcph->doff*4) - sizeof(struct tcphdr);
391
392         if (!length)
393                 return;
394
395         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
396                                  length, buff);
397         BUG_ON(ptr == NULL);
398
399         state->td_scale =
400         state->flags = 0;
401
402         while (length > 0) {
403                 int opcode=*ptr++;
404                 int opsize;
405
406                 switch (opcode) {
407                 case TCPOPT_EOL:
408                         return;
409                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
410                         length--;
411                         continue;
412                 default:
413                         opsize=*ptr++;
414                         if (opsize < 2) /* "silly options" */
415                                 return;
416                         if (opsize > length)
417                                 return; /* don't parse partial options */
418
419                         if (opcode == TCPOPT_SACK_PERM
420                             && opsize == TCPOLEN_SACK_PERM)
421                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
422                         else if (opcode == TCPOPT_WINDOW
423                                  && opsize == TCPOLEN_WINDOW) {
424                                 state->td_scale = *(u_int8_t *)ptr;
425
426                                 if (state->td_scale > 14) {
427                                         /* See RFC1323 */
428                                         state->td_scale = 14;
429                                 }
430                                 state->flags |=
431                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
432                         }
433                         ptr += opsize - 2;
434                         length -= opsize;
435                 }
436         }
437 }
438
439 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
440                      const struct tcphdr *tcph, __u32 *sack)
441 {
442         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
443         const unsigned char *ptr;
444         int length = (tcph->doff*4) - sizeof(struct tcphdr);
445         __u32 tmp;
446
447         if (!length)
448                 return;
449
450         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
451                                  length, buff);
452         BUG_ON(ptr == NULL);
453
454         /* Fast path for timestamp-only option */
455         if (length == TCPOLEN_TSTAMP_ALIGNED
456             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
457                                        | (TCPOPT_NOP << 16)
458                                        | (TCPOPT_TIMESTAMP << 8)
459                                        | TCPOLEN_TIMESTAMP))
460                 return;
461
462         while (length > 0) {
463                 int opcode = *ptr++;
464                 int opsize, i;
465
466                 switch (opcode) {
467                 case TCPOPT_EOL:
468                         return;
469                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
470                         length--;
471                         continue;
472                 default:
473                         opsize = *ptr++;
474                         if (opsize < 2) /* "silly options" */
475                                 return;
476                         if (opsize > length)
477                                 return; /* don't parse partial options */
478
479                         if (opcode == TCPOPT_SACK
480                             && opsize >= (TCPOLEN_SACK_BASE
481                                           + TCPOLEN_SACK_PERBLOCK)
482                             && !((opsize - TCPOLEN_SACK_BASE)
483                                  % TCPOLEN_SACK_PERBLOCK)) {
484                                 for (i = 0;
485                                      i < (opsize - TCPOLEN_SACK_BASE);
486                                      i += TCPOLEN_SACK_PERBLOCK) {
487                                         tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
488
489                                         if (after(tmp, *sack))
490                                                 *sack = tmp;
491                                 }
492                                 return;
493                         }
494                         ptr += opsize - 2;
495                         length -= opsize;
496                 }
497         }
498 }
499
500 static bool tcp_in_window(const struct nf_conn *ct,
501                           struct ip_ct_tcp *state,
502                           enum ip_conntrack_dir dir,
503                           unsigned int index,
504                           const struct sk_buff *skb,
505                           unsigned int dataoff,
506                           const struct tcphdr *tcph,
507                           u_int8_t pf)
508 {
509         struct net *net = nf_ct_net(ct);
510         struct nf_tcp_net *tn = tcp_pernet(net);
511         struct ip_ct_tcp_state *sender = &state->seen[dir];
512         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
513         const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
514         __u32 seq, ack, sack, end, win, swin;
515         s32 receiver_offset;
516         bool res, in_recv_win;
517
518         /*
519          * Get the required data from the packet.
520          */
521         seq = ntohl(tcph->seq);
522         ack = sack = ntohl(tcph->ack_seq);
523         win = ntohs(tcph->window);
524         end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
525
526         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
527                 tcp_sack(skb, dataoff, tcph, &sack);
528
529         /* Take into account NAT sequence number mangling */
530         receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
531         ack -= receiver_offset;
532         sack -= receiver_offset;
533
534         pr_debug("tcp_in_window: START\n");
535         pr_debug("tcp_in_window: ");
536         nf_ct_dump_tuple(tuple);
537         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
538                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
539         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
540                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
541                  sender->td_end, sender->td_maxend, sender->td_maxwin,
542                  sender->td_scale,
543                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
544                  receiver->td_scale);
545
546         if (sender->td_maxwin == 0) {
547                 /*
548                  * Initialize sender data.
549                  */
550                 if (tcph->syn) {
551                         /*
552                          * SYN-ACK in reply to a SYN
553                          * or SYN from reply direction in simultaneous open.
554                          */
555                         sender->td_end =
556                         sender->td_maxend = end;
557                         sender->td_maxwin = (win == 0 ? 1 : win);
558
559                         tcp_options(skb, dataoff, tcph, sender);
560                         /*
561                          * RFC 1323:
562                          * Both sides must send the Window Scale option
563                          * to enable window scaling in either direction.
564                          */
565                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
566                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
567                                 sender->td_scale =
568                                 receiver->td_scale = 0;
569                         if (!tcph->ack)
570                                 /* Simultaneous open */
571                                 return true;
572                 } else {
573                         /*
574                          * We are in the middle of a connection,
575                          * its history is lost for us.
576                          * Let's try to use the data from the packet.
577                          */
578                         sender->td_end = end;
579                         swin = win << sender->td_scale;
580                         sender->td_maxwin = (swin == 0 ? 1 : swin);
581                         sender->td_maxend = end + sender->td_maxwin;
582                         /*
583                          * We haven't seen traffic in the other direction yet
584                          * but we have to tweak window tracking to pass III
585                          * and IV until that happens.
586                          */
587                         if (receiver->td_maxwin == 0)
588                                 receiver->td_end = receiver->td_maxend = sack;
589                 }
590         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
591                      && dir == IP_CT_DIR_ORIGINAL)
592                    || (state->state == TCP_CONNTRACK_SYN_RECV
593                      && dir == IP_CT_DIR_REPLY))
594                    && after(end, sender->td_end)) {
595                 /*
596                  * RFC 793: "if a TCP is reinitialized ... then it need
597                  * not wait at all; it must only be sure to use sequence
598                  * numbers larger than those recently used."
599                  */
600                 sender->td_end =
601                 sender->td_maxend = end;
602                 sender->td_maxwin = (win == 0 ? 1 : win);
603
604                 tcp_options(skb, dataoff, tcph, sender);
605         }
606
607         if (!(tcph->ack)) {
608                 /*
609                  * If there is no ACK, just pretend it was set and OK.
610                  */
611                 ack = sack = receiver->td_end;
612         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
613                     (TCP_FLAG_ACK|TCP_FLAG_RST))
614                    && (ack == 0)) {
615                 /*
616                  * Broken TCP stacks, that set ACK in RST packets as well
617                  * with zero ack value.
618                  */
619                 ack = sack = receiver->td_end;
620         }
621
622         if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
623                 /*
624                  * RST sent answering SYN.
625                  */
626                 seq = end = sender->td_end;
627
628         pr_debug("tcp_in_window: ");
629         nf_ct_dump_tuple(tuple);
630         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
631                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
632         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
633                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
634                  sender->td_end, sender->td_maxend, sender->td_maxwin,
635                  sender->td_scale,
636                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
637                  receiver->td_scale);
638
639         /* Is the ending sequence in the receive window (if available)? */
640         in_recv_win = !receiver->td_maxwin ||
641                       after(end, sender->td_end - receiver->td_maxwin - 1);
642
643         pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
644                  before(seq, sender->td_maxend + 1),
645                  (in_recv_win ? 1 : 0),
646                  before(sack, receiver->td_end + 1),
647                  after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
648
649         if (before(seq, sender->td_maxend + 1) &&
650             in_recv_win &&
651             before(sack, receiver->td_end + 1) &&
652             after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
653                 /*
654                  * Take into account window scaling (RFC 1323).
655                  */
656                 if (!tcph->syn)
657                         win <<= sender->td_scale;
658
659                 /*
660                  * Update sender data.
661                  */
662                 swin = win + (sack - ack);
663                 if (sender->td_maxwin < swin)
664                         sender->td_maxwin = swin;
665                 if (after(end, sender->td_end)) {
666                         sender->td_end = end;
667                         sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
668                 }
669                 if (tcph->ack) {
670                         if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
671                                 sender->td_maxack = ack;
672                                 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
673                         } else if (after(ack, sender->td_maxack))
674                                 sender->td_maxack = ack;
675                 }
676
677                 /*
678                  * Update receiver data.
679                  */
680                 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
681                         receiver->td_maxwin += end - sender->td_maxend;
682                 if (after(sack + win, receiver->td_maxend - 1)) {
683                         receiver->td_maxend = sack + win;
684                         if (win == 0)
685                                 receiver->td_maxend++;
686                 }
687                 if (ack == receiver->td_end)
688                         receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
689
690                 /*
691                  * Check retransmissions.
692                  */
693                 if (index == TCP_ACK_SET) {
694                         if (state->last_dir == dir
695                             && state->last_seq == seq
696                             && state->last_ack == ack
697                             && state->last_end == end
698                             && state->last_win == win)
699                                 state->retrans++;
700                         else {
701                                 state->last_dir = dir;
702                                 state->last_seq = seq;
703                                 state->last_ack = ack;
704                                 state->last_end = end;
705                                 state->last_win = win;
706                                 state->retrans = 0;
707                         }
708                 }
709                 res = true;
710         } else {
711                 res = false;
712                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
713                     tn->tcp_be_liberal)
714                         res = true;
715                 if (!res && LOG_INVALID(net, IPPROTO_TCP))
716                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
717                         "nf_ct_tcp: %s ",
718                         before(seq, sender->td_maxend + 1) ?
719                         in_recv_win ?
720                         before(sack, receiver->td_end + 1) ?
721                         after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
722                         : "ACK is under the lower bound (possible overly delayed ACK)"
723                         : "ACK is over the upper bound (ACKed data not seen yet)"
724                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
725                         : "SEQ is over the upper bound (over the window of the receiver)");
726         }
727
728         pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
729                  "receiver end=%u maxend=%u maxwin=%u\n",
730                  res, sender->td_end, sender->td_maxend, sender->td_maxwin,
731                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
732
733         return res;
734 }
735
736 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
737 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
738                                  TCPHDR_URG) + 1] =
739 {
740         [TCPHDR_SYN]                            = 1,
741         [TCPHDR_SYN|TCPHDR_URG]                 = 1,
742         [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
743         [TCPHDR_RST]                            = 1,
744         [TCPHDR_RST|TCPHDR_ACK]                 = 1,
745         [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
746         [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
747         [TCPHDR_ACK]                            = 1,
748         [TCPHDR_ACK|TCPHDR_URG]                 = 1,
749 };
750
751 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
752 static int tcp_error(struct net *net, struct nf_conn *tmpl,
753                      struct sk_buff *skb,
754                      unsigned int dataoff,
755                      enum ip_conntrack_info *ctinfo,
756                      u_int8_t pf,
757                      unsigned int hooknum)
758 {
759         const struct tcphdr *th;
760         struct tcphdr _tcph;
761         unsigned int tcplen = skb->len - dataoff;
762         u_int8_t tcpflags;
763
764         /* Smaller that minimal TCP header? */
765         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
766         if (th == NULL) {
767                 if (LOG_INVALID(net, IPPROTO_TCP))
768                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
769                                 "nf_ct_tcp: short packet ");
770                 return -NF_ACCEPT;
771         }
772
773         /* Not whole TCP header or malformed packet */
774         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
775                 if (LOG_INVALID(net, IPPROTO_TCP))
776                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
777                                 "nf_ct_tcp: truncated/malformed packet ");
778                 return -NF_ACCEPT;
779         }
780
781         /* Checksum invalid? Ignore.
782          * We skip checking packets on the outgoing path
783          * because the checksum is assumed to be correct.
784          */
785         /* FIXME: Source route IP option packets --RR */
786         if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
787             nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
788                 if (LOG_INVALID(net, IPPROTO_TCP))
789                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
790                                   "nf_ct_tcp: bad TCP checksum ");
791                 return -NF_ACCEPT;
792         }
793
794         /* Check TCP flags. */
795         tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
796         if (!tcp_valid_flags[tcpflags]) {
797                 if (LOG_INVALID(net, IPPROTO_TCP))
798                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
799                                   "nf_ct_tcp: invalid TCP flag combination ");
800                 return -NF_ACCEPT;
801         }
802
803         return NF_ACCEPT;
804 }
805
806 static unsigned int *tcp_get_timeouts(struct net *net)
807 {
808         return tcp_pernet(net)->timeouts;
809 }
810
811 /* Returns verdict for packet, or -1 for invalid. */
812 static int tcp_packet(struct nf_conn *ct,
813                       const struct sk_buff *skb,
814                       unsigned int dataoff,
815                       enum ip_conntrack_info ctinfo,
816                       u_int8_t pf,
817                       unsigned int hooknum,
818                       unsigned int *timeouts)
819 {
820         struct net *net = nf_ct_net(ct);
821         struct nf_tcp_net *tn = tcp_pernet(net);
822         struct nf_conntrack_tuple *tuple;
823         enum tcp_conntrack new_state, old_state;
824         enum ip_conntrack_dir dir;
825         const struct tcphdr *th;
826         struct tcphdr _tcph;
827         unsigned long timeout;
828         unsigned int index;
829
830         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
831         BUG_ON(th == NULL);
832
833         spin_lock_bh(&ct->lock);
834         old_state = ct->proto.tcp.state;
835         dir = CTINFO2DIR(ctinfo);
836         index = get_conntrack_index(th);
837         new_state = tcp_conntracks[dir][index][old_state];
838         tuple = &ct->tuplehash[dir].tuple;
839
840         switch (new_state) {
841         case TCP_CONNTRACK_SYN_SENT:
842                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
843                         break;
844                 /* RFC 1122: "When a connection is closed actively,
845                  * it MUST linger in TIME-WAIT state for a time 2xMSL
846                  * (Maximum Segment Lifetime). However, it MAY accept
847                  * a new SYN from the remote TCP to reopen the connection
848                  * directly from TIME-WAIT state, if..."
849                  * We ignore the conditions because we are in the
850                  * TIME-WAIT state anyway.
851                  *
852                  * Handle aborted connections: we and the server
853                  * think there is an existing connection but the client
854                  * aborts it and starts a new one.
855                  */
856                 if (((ct->proto.tcp.seen[dir].flags
857                       | ct->proto.tcp.seen[!dir].flags)
858                      & IP_CT_TCP_FLAG_CLOSE_INIT)
859                     || (ct->proto.tcp.last_dir == dir
860                         && ct->proto.tcp.last_index == TCP_RST_SET)) {
861                         /* Attempt to reopen a closed/aborted connection.
862                          * Delete this connection and look up again. */
863                         spin_unlock_bh(&ct->lock);
864
865                         /* Only repeat if we can actually remove the timer.
866                          * Destruction may already be in progress in process
867                          * context and we must give it a chance to terminate.
868                          */
869                         if (nf_ct_kill(ct))
870                                 return -NF_REPEAT;
871                         return NF_DROP;
872                 }
873                 /* Fall through */
874         case TCP_CONNTRACK_IGNORE:
875                 /* Ignored packets:
876                  *
877                  * Our connection entry may be out of sync, so ignore
878                  * packets which may signal the real connection between
879                  * the client and the server.
880                  *
881                  * a) SYN in ORIGINAL
882                  * b) SYN/ACK in REPLY
883                  * c) ACK in reply direction after initial SYN in original.
884                  *
885                  * If the ignored packet is invalid, the receiver will send
886                  * a RST we'll catch below.
887                  */
888                 if (index == TCP_SYNACK_SET
889                     && ct->proto.tcp.last_index == TCP_SYN_SET
890                     && ct->proto.tcp.last_dir != dir
891                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
892                         /* b) This SYN/ACK acknowledges a SYN that we earlier
893                          * ignored as invalid. This means that the client and
894                          * the server are both in sync, while the firewall is
895                          * not. We get in sync from the previously annotated
896                          * values.
897                          */
898                         old_state = TCP_CONNTRACK_SYN_SENT;
899                         new_state = TCP_CONNTRACK_SYN_RECV;
900                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
901                                 ct->proto.tcp.last_end;
902                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
903                                 ct->proto.tcp.last_end;
904                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
905                                 ct->proto.tcp.last_win == 0 ?
906                                         1 : ct->proto.tcp.last_win;
907                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
908                                 ct->proto.tcp.last_wscale;
909                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
910                                 ct->proto.tcp.last_flags;
911                         memset(&ct->proto.tcp.seen[dir], 0,
912                                sizeof(struct ip_ct_tcp_state));
913                         break;
914                 }
915                 ct->proto.tcp.last_index = index;
916                 ct->proto.tcp.last_dir = dir;
917                 ct->proto.tcp.last_seq = ntohl(th->seq);
918                 ct->proto.tcp.last_end =
919                     segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
920                 ct->proto.tcp.last_win = ntohs(th->window);
921
922                 /* a) This is a SYN in ORIGINAL. The client and the server
923                  * may be in sync but we are not. In that case, we annotate
924                  * the TCP options and let the packet go through. If it is a
925                  * valid SYN packet, the server will reply with a SYN/ACK, and
926                  * then we'll get in sync. Otherwise, the server ignores it. */
927                 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
928                         struct ip_ct_tcp_state seen = {};
929
930                         ct->proto.tcp.last_flags =
931                         ct->proto.tcp.last_wscale = 0;
932                         tcp_options(skb, dataoff, th, &seen);
933                         if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
934                                 ct->proto.tcp.last_flags |=
935                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
936                                 ct->proto.tcp.last_wscale = seen.td_scale;
937                         }
938                         if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
939                                 ct->proto.tcp.last_flags |=
940                                         IP_CT_TCP_FLAG_SACK_PERM;
941                         }
942                 }
943                 spin_unlock_bh(&ct->lock);
944                 if (LOG_INVALID(net, IPPROTO_TCP))
945                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
946                                   "nf_ct_tcp: invalid packet ignored in "
947                                   "state %s ", tcp_conntrack_names[old_state]);
948                 return NF_ACCEPT;
949         case TCP_CONNTRACK_MAX:
950                 /* Special case for SYN proxy: when the SYN to the server or
951                  * the SYN/ACK from the server is lost, the client may transmit
952                  * a keep-alive packet while in SYN_SENT state. This needs to
953                  * be associated with the original conntrack entry in order to
954                  * generate a new SYN with the correct sequence number.
955                  */
956                 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
957                     index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
958                     ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
959                     ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
960                         pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
961                         spin_unlock_bh(&ct->lock);
962                         return NF_ACCEPT;
963                 }
964
965                 /* Invalid packet */
966                 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
967                          dir, get_conntrack_index(th), old_state);
968                 spin_unlock_bh(&ct->lock);
969                 if (LOG_INVALID(net, IPPROTO_TCP))
970                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
971                                   "nf_ct_tcp: invalid state ");
972                 return -NF_ACCEPT;
973         case TCP_CONNTRACK_CLOSE:
974                 if (index == TCP_RST_SET
975                     && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
976                     && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
977                         /* Invalid RST  */
978                         spin_unlock_bh(&ct->lock);
979                         if (LOG_INVALID(net, IPPROTO_TCP))
980                                 nf_log_packet(net, pf, 0, skb, NULL, NULL,
981                                               NULL, "nf_ct_tcp: invalid RST ");
982                         return -NF_ACCEPT;
983                 }
984                 if (index == TCP_RST_SET
985                     && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
986                          && ct->proto.tcp.last_index == TCP_SYN_SET)
987                         || (!test_bit(IPS_ASSURED_BIT, &ct->status)
988                             && ct->proto.tcp.last_index == TCP_ACK_SET))
989                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
990                         /* RST sent to invalid SYN or ACK we had let through
991                          * at a) and c) above:
992                          *
993                          * a) SYN was in window then
994                          * c) we hold a half-open connection.
995                          *
996                          * Delete our connection entry.
997                          * We skip window checking, because packet might ACK
998                          * segments we ignored. */
999                         goto in_window;
1000                 }
1001                 /* Just fall through */
1002         default:
1003                 /* Keep compilers happy. */
1004                 break;
1005         }
1006
1007         if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1008                            skb, dataoff, th, pf)) {
1009                 spin_unlock_bh(&ct->lock);
1010                 return -NF_ACCEPT;
1011         }
1012      in_window:
1013         /* From now on we have got in-window packets */
1014         ct->proto.tcp.last_index = index;
1015         ct->proto.tcp.last_dir = dir;
1016
1017         pr_debug("tcp_conntracks: ");
1018         nf_ct_dump_tuple(tuple);
1019         pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1020                  (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1021                  (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1022                  old_state, new_state);
1023
1024         ct->proto.tcp.state = new_state;
1025         if (old_state != new_state
1026             && new_state == TCP_CONNTRACK_FIN_WAIT)
1027                 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1028
1029         if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1030             timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1031                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1032         else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1033                  IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1034                  timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1035                 timeout = timeouts[TCP_CONNTRACK_UNACK];
1036         else
1037                 timeout = timeouts[new_state];
1038         spin_unlock_bh(&ct->lock);
1039
1040         if (new_state != old_state)
1041                 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1042
1043         if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1044                 /* If only reply is a RST, we can consider ourselves not to
1045                    have an established connection: this is a fairly common
1046                    problem case, so we can delete the conntrack
1047                    immediately.  --RR */
1048                 if (th->rst) {
1049                         nf_ct_kill_acct(ct, ctinfo, skb);
1050                         return NF_ACCEPT;
1051                 }
1052                 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1053                  * pickup with loose=1. Avoid large ESTABLISHED timeout.
1054                  */
1055                 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1056                     timeout > timeouts[TCP_CONNTRACK_UNACK])
1057                         timeout = timeouts[TCP_CONNTRACK_UNACK];
1058         } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1059                    && (old_state == TCP_CONNTRACK_SYN_RECV
1060                        || old_state == TCP_CONNTRACK_ESTABLISHED)
1061                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
1062                 /* Set ASSURED if we see see valid ack in ESTABLISHED
1063                    after SYN_RECV or a valid answer for a picked up
1064                    connection. */
1065                 set_bit(IPS_ASSURED_BIT, &ct->status);
1066                 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1067         }
1068         nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1069
1070         return NF_ACCEPT;
1071 }
1072
1073 /* Called when a new connection for this protocol found. */
1074 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1075                     unsigned int dataoff, unsigned int *timeouts)
1076 {
1077         enum tcp_conntrack new_state;
1078         const struct tcphdr *th;
1079         struct tcphdr _tcph;
1080         struct net *net = nf_ct_net(ct);
1081         struct nf_tcp_net *tn = tcp_pernet(net);
1082         const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1083         const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1084
1085         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1086         BUG_ON(th == NULL);
1087
1088         /* Don't need lock here: this conntrack not in circulation yet */
1089         new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1090
1091         /* Invalid: delete conntrack */
1092         if (new_state >= TCP_CONNTRACK_MAX) {
1093                 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1094                 return false;
1095         }
1096
1097         if (new_state == TCP_CONNTRACK_SYN_SENT) {
1098                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1099                 /* SYN packet */
1100                 ct->proto.tcp.seen[0].td_end =
1101                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1102                                              dataoff, th);
1103                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1104                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1105                         ct->proto.tcp.seen[0].td_maxwin = 1;
1106                 ct->proto.tcp.seen[0].td_maxend =
1107                         ct->proto.tcp.seen[0].td_end;
1108
1109                 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1110         } else if (tn->tcp_loose == 0) {
1111                 /* Don't try to pick up connections. */
1112                 return false;
1113         } else {
1114                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1115                 /*
1116                  * We are in the middle of a connection,
1117                  * its history is lost for us.
1118                  * Let's try to use the data from the packet.
1119                  */
1120                 ct->proto.tcp.seen[0].td_end =
1121                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1122                                              dataoff, th);
1123                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1124                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1125                         ct->proto.tcp.seen[0].td_maxwin = 1;
1126                 ct->proto.tcp.seen[0].td_maxend =
1127                         ct->proto.tcp.seen[0].td_end +
1128                         ct->proto.tcp.seen[0].td_maxwin;
1129
1130                 /* We assume SACK and liberal window checking to handle
1131                  * window scaling */
1132                 ct->proto.tcp.seen[0].flags =
1133                 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1134                                               IP_CT_TCP_FLAG_BE_LIBERAL;
1135         }
1136
1137         /* tcp_packet will set them */
1138         ct->proto.tcp.last_index = TCP_NONE_SET;
1139
1140         pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1141                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1142                  sender->td_end, sender->td_maxend, sender->td_maxwin,
1143                  sender->td_scale,
1144                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1145                  receiver->td_scale);
1146         return true;
1147 }
1148
1149 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1150
1151 #include <linux/netfilter/nfnetlink.h>
1152 #include <linux/netfilter/nfnetlink_conntrack.h>
1153
1154 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1155                          struct nf_conn *ct)
1156 {
1157         struct nlattr *nest_parms;
1158         struct nf_ct_tcp_flags tmp = {};
1159
1160         spin_lock_bh(&ct->lock);
1161         nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1162         if (!nest_parms)
1163                 goto nla_put_failure;
1164
1165         if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1166             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1167                        ct->proto.tcp.seen[0].td_scale) ||
1168             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1169                        ct->proto.tcp.seen[1].td_scale))
1170                 goto nla_put_failure;
1171
1172         tmp.flags = ct->proto.tcp.seen[0].flags;
1173         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1174                     sizeof(struct nf_ct_tcp_flags), &tmp))
1175                 goto nla_put_failure;
1176
1177         tmp.flags = ct->proto.tcp.seen[1].flags;
1178         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1179                     sizeof(struct nf_ct_tcp_flags), &tmp))
1180                 goto nla_put_failure;
1181         spin_unlock_bh(&ct->lock);
1182
1183         nla_nest_end(skb, nest_parms);
1184
1185         return 0;
1186
1187 nla_put_failure:
1188         spin_unlock_bh(&ct->lock);
1189         return -1;
1190 }
1191
1192 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1193         [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1194         [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1195         [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1196         [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1197         [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1198 };
1199
1200 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1201 {
1202         struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1203         struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1204         int err;
1205
1206         /* updates could not contain anything about the private
1207          * protocol info, in that case skip the parsing */
1208         if (!pattr)
1209                 return 0;
1210
1211         err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1212         if (err < 0)
1213                 return err;
1214
1215         if (tb[CTA_PROTOINFO_TCP_STATE] &&
1216             nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1217                 return -EINVAL;
1218
1219         spin_lock_bh(&ct->lock);
1220         if (tb[CTA_PROTOINFO_TCP_STATE])
1221                 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1222
1223         if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1224                 struct nf_ct_tcp_flags *attr =
1225                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1226                 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1227                 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1228         }
1229
1230         if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1231                 struct nf_ct_tcp_flags *attr =
1232                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1233                 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1234                 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1235         }
1236
1237         if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1238             tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1239             ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1240             ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1241                 ct->proto.tcp.seen[0].td_scale =
1242                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1243                 ct->proto.tcp.seen[1].td_scale =
1244                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1245         }
1246         spin_unlock_bh(&ct->lock);
1247
1248         return 0;
1249 }
1250
1251 static int tcp_nlattr_size(void)
1252 {
1253         return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
1254                 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1255 }
1256
1257 static int tcp_nlattr_tuple_size(void)
1258 {
1259         return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1260 }
1261 #endif
1262
1263 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1264
1265 #include <linux/netfilter/nfnetlink.h>
1266 #include <linux/netfilter/nfnetlink_cttimeout.h>
1267
1268 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1269                                      struct net *net, void *data)
1270 {
1271         unsigned int *timeouts = data;
1272         struct nf_tcp_net *tn = tcp_pernet(net);
1273         int i;
1274
1275         /* set default TCP timeouts. */
1276         for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1277                 timeouts[i] = tn->timeouts[i];
1278
1279         if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1280                 timeouts[TCP_CONNTRACK_SYN_SENT] =
1281                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1282         }
1283         if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1284                 timeouts[TCP_CONNTRACK_SYN_RECV] =
1285                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1286         }
1287         if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1288                 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1289                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1290         }
1291         if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1292                 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1293                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1294         }
1295         if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1296                 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1297                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1298         }
1299         if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1300                 timeouts[TCP_CONNTRACK_LAST_ACK] =
1301                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1302         }
1303         if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1304                 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1305                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1306         }
1307         if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1308                 timeouts[TCP_CONNTRACK_CLOSE] =
1309                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1310         }
1311         if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1312                 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1313                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1314         }
1315         if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1316                 timeouts[TCP_CONNTRACK_RETRANS] =
1317                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1318         }
1319         if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1320                 timeouts[TCP_CONNTRACK_UNACK] =
1321                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1322         }
1323         return 0;
1324 }
1325
1326 static int
1327 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1328 {
1329         const unsigned int *timeouts = data;
1330
1331         if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1332                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1333             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1334                          htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1335             nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1336                          htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1337             nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1338                          htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1339             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1340                          htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1341             nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1342                          htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1343             nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1344                          htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1345             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1346                          htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1347             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1348                          htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1349             nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1350                          htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1351             nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1352                          htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1353                 goto nla_put_failure;
1354         return 0;
1355
1356 nla_put_failure:
1357         return -ENOSPC;
1358 }
1359
1360 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1361         [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1362         [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1363         [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1364         [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1365         [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1366         [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1367         [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1368         [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1369         [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1370         [CTA_TIMEOUT_TCP_RETRANS]       = { .type = NLA_U32 },
1371         [CTA_TIMEOUT_TCP_UNACK]         = { .type = NLA_U32 },
1372 };
1373 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1374
1375 #ifdef CONFIG_SYSCTL
1376 static struct ctl_table tcp_sysctl_table[] = {
1377         {
1378                 .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1379                 .maxlen         = sizeof(unsigned int),
1380                 .mode           = 0644,
1381                 .proc_handler   = proc_dointvec_jiffies,
1382         },
1383         {
1384                 .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1385                 .maxlen         = sizeof(unsigned int),
1386                 .mode           = 0644,
1387                 .proc_handler   = proc_dointvec_jiffies,
1388         },
1389         {
1390                 .procname       = "nf_conntrack_tcp_timeout_established",
1391                 .maxlen         = sizeof(unsigned int),
1392                 .mode           = 0644,
1393                 .proc_handler   = proc_dointvec_jiffies,
1394         },
1395         {
1396                 .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1397                 .maxlen         = sizeof(unsigned int),
1398                 .mode           = 0644,
1399                 .proc_handler   = proc_dointvec_jiffies,
1400         },
1401         {
1402                 .procname       = "nf_conntrack_tcp_timeout_close_wait",
1403                 .maxlen         = sizeof(unsigned int),
1404                 .mode           = 0644,
1405                 .proc_handler   = proc_dointvec_jiffies,
1406         },
1407         {
1408                 .procname       = "nf_conntrack_tcp_timeout_last_ack",
1409                 .maxlen         = sizeof(unsigned int),
1410                 .mode           = 0644,
1411                 .proc_handler   = proc_dointvec_jiffies,
1412         },
1413         {
1414                 .procname       = "nf_conntrack_tcp_timeout_time_wait",
1415                 .maxlen         = sizeof(unsigned int),
1416                 .mode           = 0644,
1417                 .proc_handler   = proc_dointvec_jiffies,
1418         },
1419         {
1420                 .procname       = "nf_conntrack_tcp_timeout_close",
1421                 .maxlen         = sizeof(unsigned int),
1422                 .mode           = 0644,
1423                 .proc_handler   = proc_dointvec_jiffies,
1424         },
1425         {
1426                 .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1427                 .maxlen         = sizeof(unsigned int),
1428                 .mode           = 0644,
1429                 .proc_handler   = proc_dointvec_jiffies,
1430         },
1431         {
1432                 .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1433                 .maxlen         = sizeof(unsigned int),
1434                 .mode           = 0644,
1435                 .proc_handler   = proc_dointvec_jiffies,
1436         },
1437         {
1438                 .procname       = "nf_conntrack_tcp_loose",
1439                 .maxlen         = sizeof(unsigned int),
1440                 .mode           = 0644,
1441                 .proc_handler   = proc_dointvec,
1442         },
1443         {
1444                 .procname       = "nf_conntrack_tcp_be_liberal",
1445                 .maxlen         = sizeof(unsigned int),
1446                 .mode           = 0644,
1447                 .proc_handler   = proc_dointvec,
1448         },
1449         {
1450                 .procname       = "nf_conntrack_tcp_max_retrans",
1451                 .maxlen         = sizeof(unsigned int),
1452                 .mode           = 0644,
1453                 .proc_handler   = proc_dointvec,
1454         },
1455         { }
1456 };
1457
1458 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1459 static struct ctl_table tcp_compat_sysctl_table[] = {
1460         {
1461                 .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1462                 .maxlen         = sizeof(unsigned int),
1463                 .mode           = 0644,
1464                 .proc_handler   = proc_dointvec_jiffies,
1465         },
1466         {
1467                 .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
1468                 .maxlen         = sizeof(unsigned int),
1469                 .mode           = 0644,
1470                 .proc_handler   = proc_dointvec_jiffies,
1471         },
1472         {
1473                 .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1474                 .maxlen         = sizeof(unsigned int),
1475                 .mode           = 0644,
1476                 .proc_handler   = proc_dointvec_jiffies,
1477         },
1478         {
1479                 .procname       = "ip_conntrack_tcp_timeout_established",
1480                 .maxlen         = sizeof(unsigned int),
1481                 .mode           = 0644,
1482                 .proc_handler   = proc_dointvec_jiffies,
1483         },
1484         {
1485                 .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1486                 .maxlen         = sizeof(unsigned int),
1487                 .mode           = 0644,
1488                 .proc_handler   = proc_dointvec_jiffies,
1489         },
1490         {
1491                 .procname       = "ip_conntrack_tcp_timeout_close_wait",
1492                 .maxlen         = sizeof(unsigned int),
1493                 .mode           = 0644,
1494                 .proc_handler   = proc_dointvec_jiffies,
1495         },
1496         {
1497                 .procname       = "ip_conntrack_tcp_timeout_last_ack",
1498                 .maxlen         = sizeof(unsigned int),
1499                 .mode           = 0644,
1500                 .proc_handler   = proc_dointvec_jiffies,
1501         },
1502         {
1503                 .procname       = "ip_conntrack_tcp_timeout_time_wait",
1504                 .maxlen         = sizeof(unsigned int),
1505                 .mode           = 0644,
1506                 .proc_handler   = proc_dointvec_jiffies,
1507         },
1508         {
1509                 .procname       = "ip_conntrack_tcp_timeout_close",
1510                 .maxlen         = sizeof(unsigned int),
1511                 .mode           = 0644,
1512                 .proc_handler   = proc_dointvec_jiffies,
1513         },
1514         {
1515                 .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1516                 .maxlen         = sizeof(unsigned int),
1517                 .mode           = 0644,
1518                 .proc_handler   = proc_dointvec_jiffies,
1519         },
1520         {
1521                 .procname       = "ip_conntrack_tcp_loose",
1522                 .maxlen         = sizeof(unsigned int),
1523                 .mode           = 0644,
1524                 .proc_handler   = proc_dointvec,
1525         },
1526         {
1527                 .procname       = "ip_conntrack_tcp_be_liberal",
1528                 .maxlen         = sizeof(unsigned int),
1529                 .mode           = 0644,
1530                 .proc_handler   = proc_dointvec,
1531         },
1532         {
1533                 .procname       = "ip_conntrack_tcp_max_retrans",
1534                 .maxlen         = sizeof(unsigned int),
1535                 .mode           = 0644,
1536                 .proc_handler   = proc_dointvec,
1537         },
1538         { }
1539 };
1540 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1541 #endif /* CONFIG_SYSCTL */
1542
1543 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1544                                     struct nf_tcp_net *tn)
1545 {
1546 #ifdef CONFIG_SYSCTL
1547         if (pn->ctl_table)
1548                 return 0;
1549
1550         pn->ctl_table = kmemdup(tcp_sysctl_table,
1551                                 sizeof(tcp_sysctl_table),
1552                                 GFP_KERNEL);
1553         if (!pn->ctl_table)
1554                 return -ENOMEM;
1555
1556         pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1557         pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1558         pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1559         pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1560         pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1561         pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1562         pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1563         pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1564         pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1565         pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1566         pn->ctl_table[10].data = &tn->tcp_loose;
1567         pn->ctl_table[11].data = &tn->tcp_be_liberal;
1568         pn->ctl_table[12].data = &tn->tcp_max_retrans;
1569 #endif
1570         return 0;
1571 }
1572
1573 static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1574                                            struct nf_tcp_net *tn)
1575 {
1576 #ifdef CONFIG_SYSCTL
1577 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1578         pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1579                                        sizeof(tcp_compat_sysctl_table),
1580                                        GFP_KERNEL);
1581         if (!pn->ctl_compat_table)
1582                 return -ENOMEM;
1583
1584         pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1585         pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1586         pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1587         pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1588         pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1589         pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1590         pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1591         pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1592         pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1593         pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1594         pn->ctl_compat_table[10].data = &tn->tcp_loose;
1595         pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1596         pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1597 #endif
1598 #endif
1599         return 0;
1600 }
1601
1602 static int tcp_init_net(struct net *net, u_int16_t proto)
1603 {
1604         int ret;
1605         struct nf_tcp_net *tn = tcp_pernet(net);
1606         struct nf_proto_net *pn = &tn->pn;
1607
1608         if (!pn->users) {
1609                 int i;
1610
1611                 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1612                         tn->timeouts[i] = tcp_timeouts[i];
1613
1614                 tn->tcp_loose = nf_ct_tcp_loose;
1615                 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1616                 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1617         }
1618
1619         if (proto == AF_INET) {
1620                 ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1621                 if (ret < 0)
1622                         return ret;
1623
1624                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1625                 if (ret < 0)
1626                         nf_ct_kfree_compat_sysctl_table(pn);
1627         } else
1628                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1629
1630         return ret;
1631 }
1632
1633 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1634 {
1635         return &net->ct.nf_ct_proto.tcp.pn;
1636 }
1637
1638 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1639 {
1640         .l3proto                = PF_INET,
1641         .l4proto                = IPPROTO_TCP,
1642         .name                   = "tcp",
1643         .pkt_to_tuple           = tcp_pkt_to_tuple,
1644         .invert_tuple           = tcp_invert_tuple,
1645         .print_tuple            = tcp_print_tuple,
1646         .print_conntrack        = tcp_print_conntrack,
1647         .packet                 = tcp_packet,
1648         .get_timeouts           = tcp_get_timeouts,
1649         .new                    = tcp_new,
1650         .error                  = tcp_error,
1651 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1652         .to_nlattr              = tcp_to_nlattr,
1653         .nlattr_size            = tcp_nlattr_size,
1654         .from_nlattr            = nlattr_to_tcp,
1655         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1656         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1657         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1658         .nla_policy             = nf_ct_port_nla_policy,
1659 #endif
1660 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1661         .ctnl_timeout           = {
1662                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1663                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1664                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1665                 .obj_size       = sizeof(unsigned int) *
1666                                         TCP_CONNTRACK_TIMEOUT_MAX,
1667                 .nla_policy     = tcp_timeout_nla_policy,
1668         },
1669 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1670         .init_net               = tcp_init_net,
1671         .get_net_proto          = tcp_get_net_proto,
1672 };
1673 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1674
1675 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1676 {
1677         .l3proto                = PF_INET6,
1678         .l4proto                = IPPROTO_TCP,
1679         .name                   = "tcp",
1680         .pkt_to_tuple           = tcp_pkt_to_tuple,
1681         .invert_tuple           = tcp_invert_tuple,
1682         .print_tuple            = tcp_print_tuple,
1683         .print_conntrack        = tcp_print_conntrack,
1684         .packet                 = tcp_packet,
1685         .get_timeouts           = tcp_get_timeouts,
1686         .new                    = tcp_new,
1687         .error                  = tcp_error,
1688 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1689         .to_nlattr              = tcp_to_nlattr,
1690         .nlattr_size            = tcp_nlattr_size,
1691         .from_nlattr            = nlattr_to_tcp,
1692         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1693         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1694         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1695         .nla_policy             = nf_ct_port_nla_policy,
1696 #endif
1697 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1698         .ctnl_timeout           = {
1699                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1700                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1701                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1702                 .obj_size       = sizeof(unsigned int) *
1703                                         TCP_CONNTRACK_TIMEOUT_MAX,
1704                 .nla_policy     = tcp_timeout_nla_policy,
1705         },
1706 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1707         .init_net               = tcp_init_net,
1708         .get_net_proto          = tcp_get_net_proto,
1709 };
1710 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);