Merge tag 'v4.14-rc2' into k.o/for-next
[sfrench/cifs-2.6.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 struct percpu_counter dccp_orphan_count;
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57         static const char *const dccp_state_names[] = {
58         [DCCP_OPEN]             = "OPEN",
59         [DCCP_REQUESTING]       = "REQUESTING",
60         [DCCP_PARTOPEN]         = "PARTOPEN",
61         [DCCP_LISTEN]           = "LISTEN",
62         [DCCP_RESPOND]          = "RESPOND",
63         [DCCP_CLOSING]          = "CLOSING",
64         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
65         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
66         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67         [DCCP_TIME_WAIT]        = "TIME_WAIT",
68         [DCCP_CLOSED]           = "CLOSED",
69         };
70
71         if (state >= DCCP_MAX_STATES)
72                 return "INVALID STATE!";
73         else
74                 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80         const int oldstate = sk->sk_state;
81
82         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83                       dccp_state_name(oldstate), dccp_state_name(state));
84         WARN_ON(state == oldstate);
85
86         switch (state) {
87         case DCCP_OPEN:
88                 if (oldstate != DCCP_OPEN)
89                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90                 /* Client retransmits all Confirm options until entering OPEN */
91                 if (oldstate == DCCP_PARTOPEN)
92                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93                 break;
94
95         case DCCP_CLOSED:
96                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97                     oldstate == DCCP_CLOSING)
98                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100                 sk->sk_prot->unhash(sk);
101                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103                         inet_put_port(sk);
104                 /* fall through */
105         default:
106                 if (oldstate == DCCP_OPEN)
107                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108         }
109
110         /* Change state AFTER socket is unhashed to avoid closed
111          * socket sitting in hash tables.
112          */
113         sk->sk_state = state;
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120         switch (sk->sk_state) {
121         case DCCP_PASSIVE_CLOSE:
122                 /* Node (client or server) has received Close packet. */
123                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124                 dccp_set_state(sk, DCCP_CLOSED);
125                 break;
126         case DCCP_PASSIVE_CLOSEREQ:
127                 /*
128                  * Client received CloseReq. We set the `active' flag so that
129                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130                  */
131                 dccp_send_close(sk, 1);
132                 dccp_set_state(sk, DCCP_CLOSING);
133         }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138         dccp_set_state(sk, DCCP_CLOSED);
139         dccp_clear_xmit_timers(sk);
140
141         sk->sk_shutdown = SHUTDOWN_MASK;
142
143         if (!sock_flag(sk, SOCK_DEAD))
144                 sk->sk_state_change(sk);
145         else
146                 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153         static const char *const dccp_packet_names[] = {
154                 [DCCP_PKT_REQUEST]  = "REQUEST",
155                 [DCCP_PKT_RESPONSE] = "RESPONSE",
156                 [DCCP_PKT_DATA]     = "DATA",
157                 [DCCP_PKT_ACK]      = "ACK",
158                 [DCCP_PKT_DATAACK]  = "DATAACK",
159                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160                 [DCCP_PKT_CLOSE]    = "CLOSE",
161                 [DCCP_PKT_RESET]    = "RESET",
162                 [DCCP_PKT_SYNC]     = "SYNC",
163                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
164         };
165
166         if (type >= DCCP_NR_PKT_TYPES)
167                 return "INVALID";
168         else
169                 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 static void dccp_sk_destruct(struct sock *sk)
175 {
176         struct dccp_sock *dp = dccp_sk(sk);
177
178         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179         dp->dccps_hc_tx_ccid = NULL;
180         inet_sock_destruct(sk);
181 }
182
183 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184 {
185         struct dccp_sock *dp = dccp_sk(sk);
186         struct inet_connection_sock *icsk = inet_csk(sk);
187
188         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
189         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
190         sk->sk_state            = DCCP_CLOSED;
191         sk->sk_write_space      = dccp_write_space;
192         sk->sk_destruct         = dccp_sk_destruct;
193         icsk->icsk_sync_mss     = dccp_sync_mss;
194         dp->dccps_mss_cache     = 536;
195         dp->dccps_rate_last     = jiffies;
196         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
197         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
198         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
199
200         dccp_init_xmit_timers(sk);
201
202         INIT_LIST_HEAD(&dp->dccps_featneg);
203         /* control socket doesn't need feat nego */
204         if (likely(ctl_sock_initialized))
205                 return dccp_feat_init(sk);
206         return 0;
207 }
208
209 EXPORT_SYMBOL_GPL(dccp_init_sock);
210
211 void dccp_destroy_sock(struct sock *sk)
212 {
213         struct dccp_sock *dp = dccp_sk(sk);
214
215         __skb_queue_purge(&sk->sk_write_queue);
216         if (sk->sk_send_head != NULL) {
217                 kfree_skb(sk->sk_send_head);
218                 sk->sk_send_head = NULL;
219         }
220
221         /* Clean up a referenced DCCP bind bucket. */
222         if (inet_csk(sk)->icsk_bind_hash != NULL)
223                 inet_put_port(sk);
224
225         kfree(dp->dccps_service_list);
226         dp->dccps_service_list = NULL;
227
228         if (dp->dccps_hc_rx_ackvec != NULL) {
229                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230                 dp->dccps_hc_rx_ackvec = NULL;
231         }
232         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233         dp->dccps_hc_rx_ccid = NULL;
234
235         /* clean up feature negotiation state */
236         dccp_feat_list_purge(&dp->dccps_featneg);
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240
241 static inline int dccp_listen_start(struct sock *sk, int backlog)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244
245         dp->dccps_role = DCCP_ROLE_LISTEN;
246         /* do not start to listen if feature negotiation setup fails */
247         if (dccp_feat_finalise_settings(dp))
248                 return -EPROTO;
249         return inet_csk_listen_start(sk, backlog);
250 }
251
252 static inline int dccp_need_reset(int state)
253 {
254         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
255                state != DCCP_REQUESTING;
256 }
257
258 int dccp_disconnect(struct sock *sk, int flags)
259 {
260         struct inet_connection_sock *icsk = inet_csk(sk);
261         struct inet_sock *inet = inet_sk(sk);
262         int err = 0;
263         const int old_state = sk->sk_state;
264
265         if (old_state != DCCP_CLOSED)
266                 dccp_set_state(sk, DCCP_CLOSED);
267
268         /*
269          * This corresponds to the ABORT function of RFC793, sec. 3.8
270          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
271          */
272         if (old_state == DCCP_LISTEN) {
273                 inet_csk_listen_stop(sk);
274         } else if (dccp_need_reset(old_state)) {
275                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
276                 sk->sk_err = ECONNRESET;
277         } else if (old_state == DCCP_REQUESTING)
278                 sk->sk_err = ECONNRESET;
279
280         dccp_clear_xmit_timers(sk);
281
282         __skb_queue_purge(&sk->sk_receive_queue);
283         __skb_queue_purge(&sk->sk_write_queue);
284         if (sk->sk_send_head != NULL) {
285                 __kfree_skb(sk->sk_send_head);
286                 sk->sk_send_head = NULL;
287         }
288
289         inet->inet_dport = 0;
290
291         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
292                 inet_reset_saddr(sk);
293
294         sk->sk_shutdown = 0;
295         sock_reset_flag(sk, SOCK_DONE);
296
297         icsk->icsk_backoff = 0;
298         inet_csk_delack_init(sk);
299         __sk_dst_reset(sk);
300
301         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
302
303         sk->sk_error_report(sk);
304         return err;
305 }
306
307 EXPORT_SYMBOL_GPL(dccp_disconnect);
308
309 /*
310  *      Wait for a DCCP event.
311  *
312  *      Note that we don't need to lock the socket, as the upper poll layers
313  *      take care of normal races (between the test and the event) and we don't
314  *      go look at any of the socket buffers directly.
315  */
316 unsigned int dccp_poll(struct file *file, struct socket *sock,
317                        poll_table *wait)
318 {
319         unsigned int mask;
320         struct sock *sk = sock->sk;
321
322         sock_poll_wait(file, sk_sleep(sk), wait);
323         if (sk->sk_state == DCCP_LISTEN)
324                 return inet_csk_listen_poll(sk);
325
326         /* Socket is not locked. We are protected from async events
327            by poll logic and correct handling of state changes
328            made by another threads is impossible in any case.
329          */
330
331         mask = 0;
332         if (sk->sk_err)
333                 mask = POLLERR;
334
335         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
336                 mask |= POLLHUP;
337         if (sk->sk_shutdown & RCV_SHUTDOWN)
338                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
339
340         /* Connected? */
341         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
342                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
343                         mask |= POLLIN | POLLRDNORM;
344
345                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
346                         if (sk_stream_is_writeable(sk)) {
347                                 mask |= POLLOUT | POLLWRNORM;
348                         } else {  /* send SIGIO later */
349                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
350                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
351
352                                 /* Race breaker. If space is freed after
353                                  * wspace test but before the flags are set,
354                                  * IO signal will be lost.
355                                  */
356                                 if (sk_stream_is_writeable(sk))
357                                         mask |= POLLOUT | POLLWRNORM;
358                         }
359                 }
360         }
361         return mask;
362 }
363
364 EXPORT_SYMBOL_GPL(dccp_poll);
365
366 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
367 {
368         int rc = -ENOTCONN;
369
370         lock_sock(sk);
371
372         if (sk->sk_state == DCCP_LISTEN)
373                 goto out;
374
375         switch (cmd) {
376         case SIOCINQ: {
377                 struct sk_buff *skb;
378                 unsigned long amount = 0;
379
380                 skb = skb_peek(&sk->sk_receive_queue);
381                 if (skb != NULL) {
382                         /*
383                          * We will only return the amount of this packet since
384                          * that is all that will be read.
385                          */
386                         amount = skb->len;
387                 }
388                 rc = put_user(amount, (int __user *)arg);
389         }
390                 break;
391         default:
392                 rc = -ENOIOCTLCMD;
393                 break;
394         }
395 out:
396         release_sock(sk);
397         return rc;
398 }
399
400 EXPORT_SYMBOL_GPL(dccp_ioctl);
401
402 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
403                                    char __user *optval, unsigned int optlen)
404 {
405         struct dccp_sock *dp = dccp_sk(sk);
406         struct dccp_service_list *sl = NULL;
407
408         if (service == DCCP_SERVICE_INVALID_VALUE ||
409             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
410                 return -EINVAL;
411
412         if (optlen > sizeof(service)) {
413                 sl = kmalloc(optlen, GFP_KERNEL);
414                 if (sl == NULL)
415                         return -ENOMEM;
416
417                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
418                 if (copy_from_user(sl->dccpsl_list,
419                                    optval + sizeof(service),
420                                    optlen - sizeof(service)) ||
421                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
422                         kfree(sl);
423                         return -EFAULT;
424                 }
425         }
426
427         lock_sock(sk);
428         dp->dccps_service = service;
429
430         kfree(dp->dccps_service_list);
431
432         dp->dccps_service_list = sl;
433         release_sock(sk);
434         return 0;
435 }
436
437 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
438 {
439         u8 *list, len;
440         int i, rc;
441
442         if (cscov < 0 || cscov > 15)
443                 return -EINVAL;
444         /*
445          * Populate a list of permissible values, in the range cscov...15. This
446          * is necessary since feature negotiation of single values only works if
447          * both sides incidentally choose the same value. Since the list starts
448          * lowest-value first, negotiation will pick the smallest shared value.
449          */
450         if (cscov == 0)
451                 return 0;
452         len = 16 - cscov;
453
454         list = kmalloc(len, GFP_KERNEL);
455         if (list == NULL)
456                 return -ENOBUFS;
457
458         for (i = 0; i < len; i++)
459                 list[i] = cscov++;
460
461         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
462
463         if (rc == 0) {
464                 if (rx)
465                         dccp_sk(sk)->dccps_pcrlen = cscov;
466                 else
467                         dccp_sk(sk)->dccps_pcslen = cscov;
468         }
469         kfree(list);
470         return rc;
471 }
472
473 static int dccp_setsockopt_ccid(struct sock *sk, int type,
474                                 char __user *optval, unsigned int optlen)
475 {
476         u8 *val;
477         int rc = 0;
478
479         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
480                 return -EINVAL;
481
482         val = memdup_user(optval, optlen);
483         if (IS_ERR(val))
484                 return PTR_ERR(val);
485
486         lock_sock(sk);
487         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
488                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
489
490         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
491                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
492         release_sock(sk);
493
494         kfree(val);
495         return rc;
496 }
497
498 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
499                 char __user *optval, unsigned int optlen)
500 {
501         struct dccp_sock *dp = dccp_sk(sk);
502         int val, err = 0;
503
504         switch (optname) {
505         case DCCP_SOCKOPT_PACKET_SIZE:
506                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
507                 return 0;
508         case DCCP_SOCKOPT_CHANGE_L:
509         case DCCP_SOCKOPT_CHANGE_R:
510                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
511                 return 0;
512         case DCCP_SOCKOPT_CCID:
513         case DCCP_SOCKOPT_RX_CCID:
514         case DCCP_SOCKOPT_TX_CCID:
515                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
516         }
517
518         if (optlen < (int)sizeof(int))
519                 return -EINVAL;
520
521         if (get_user(val, (int __user *)optval))
522                 return -EFAULT;
523
524         if (optname == DCCP_SOCKOPT_SERVICE)
525                 return dccp_setsockopt_service(sk, val, optval, optlen);
526
527         lock_sock(sk);
528         switch (optname) {
529         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
530                 if (dp->dccps_role != DCCP_ROLE_SERVER)
531                         err = -EOPNOTSUPP;
532                 else
533                         dp->dccps_server_timewait = (val != 0);
534                 break;
535         case DCCP_SOCKOPT_SEND_CSCOV:
536                 err = dccp_setsockopt_cscov(sk, val, false);
537                 break;
538         case DCCP_SOCKOPT_RECV_CSCOV:
539                 err = dccp_setsockopt_cscov(sk, val, true);
540                 break;
541         case DCCP_SOCKOPT_QPOLICY_ID:
542                 if (sk->sk_state != DCCP_CLOSED)
543                         err = -EISCONN;
544                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
545                         err = -EINVAL;
546                 else
547                         dp->dccps_qpolicy = val;
548                 break;
549         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
550                 if (val < 0)
551                         err = -EINVAL;
552                 else
553                         dp->dccps_tx_qlen = val;
554                 break;
555         default:
556                 err = -ENOPROTOOPT;
557                 break;
558         }
559         release_sock(sk);
560
561         return err;
562 }
563
564 int dccp_setsockopt(struct sock *sk, int level, int optname,
565                     char __user *optval, unsigned int optlen)
566 {
567         if (level != SOL_DCCP)
568                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
569                                                              optname, optval,
570                                                              optlen);
571         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
572 }
573
574 EXPORT_SYMBOL_GPL(dccp_setsockopt);
575
576 #ifdef CONFIG_COMPAT
577 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
578                            char __user *optval, unsigned int optlen)
579 {
580         if (level != SOL_DCCP)
581                 return inet_csk_compat_setsockopt(sk, level, optname,
582                                                   optval, optlen);
583         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
584 }
585
586 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
587 #endif
588
589 static int dccp_getsockopt_service(struct sock *sk, int len,
590                                    __be32 __user *optval,
591                                    int __user *optlen)
592 {
593         const struct dccp_sock *dp = dccp_sk(sk);
594         const struct dccp_service_list *sl;
595         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
596
597         lock_sock(sk);
598         if ((sl = dp->dccps_service_list) != NULL) {
599                 slen = sl->dccpsl_nr * sizeof(u32);
600                 total_len += slen;
601         }
602
603         err = -EINVAL;
604         if (total_len > len)
605                 goto out;
606
607         err = 0;
608         if (put_user(total_len, optlen) ||
609             put_user(dp->dccps_service, optval) ||
610             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
611                 err = -EFAULT;
612 out:
613         release_sock(sk);
614         return err;
615 }
616
617 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
618                     char __user *optval, int __user *optlen)
619 {
620         struct dccp_sock *dp;
621         int val, len;
622
623         if (get_user(len, optlen))
624                 return -EFAULT;
625
626         if (len < (int)sizeof(int))
627                 return -EINVAL;
628
629         dp = dccp_sk(sk);
630
631         switch (optname) {
632         case DCCP_SOCKOPT_PACKET_SIZE:
633                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
634                 return 0;
635         case DCCP_SOCKOPT_SERVICE:
636                 return dccp_getsockopt_service(sk, len,
637                                                (__be32 __user *)optval, optlen);
638         case DCCP_SOCKOPT_GET_CUR_MPS:
639                 val = dp->dccps_mss_cache;
640                 break;
641         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
642                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
643         case DCCP_SOCKOPT_TX_CCID:
644                 val = ccid_get_current_tx_ccid(dp);
645                 if (val < 0)
646                         return -ENOPROTOOPT;
647                 break;
648         case DCCP_SOCKOPT_RX_CCID:
649                 val = ccid_get_current_rx_ccid(dp);
650                 if (val < 0)
651                         return -ENOPROTOOPT;
652                 break;
653         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
654                 val = dp->dccps_server_timewait;
655                 break;
656         case DCCP_SOCKOPT_SEND_CSCOV:
657                 val = dp->dccps_pcslen;
658                 break;
659         case DCCP_SOCKOPT_RECV_CSCOV:
660                 val = dp->dccps_pcrlen;
661                 break;
662         case DCCP_SOCKOPT_QPOLICY_ID:
663                 val = dp->dccps_qpolicy;
664                 break;
665         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
666                 val = dp->dccps_tx_qlen;
667                 break;
668         case 128 ... 191:
669                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
670                                              len, (u32 __user *)optval, optlen);
671         case 192 ... 255:
672                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
673                                              len, (u32 __user *)optval, optlen);
674         default:
675                 return -ENOPROTOOPT;
676         }
677
678         len = sizeof(val);
679         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
680                 return -EFAULT;
681
682         return 0;
683 }
684
685 int dccp_getsockopt(struct sock *sk, int level, int optname,
686                     char __user *optval, int __user *optlen)
687 {
688         if (level != SOL_DCCP)
689                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
690                                                              optname, optval,
691                                                              optlen);
692         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
693 }
694
695 EXPORT_SYMBOL_GPL(dccp_getsockopt);
696
697 #ifdef CONFIG_COMPAT
698 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
699                            char __user *optval, int __user *optlen)
700 {
701         if (level != SOL_DCCP)
702                 return inet_csk_compat_getsockopt(sk, level, optname,
703                                                   optval, optlen);
704         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
705 }
706
707 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
708 #endif
709
710 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
711 {
712         struct cmsghdr *cmsg;
713
714         /*
715          * Assign an (opaque) qpolicy priority value to skb->priority.
716          *
717          * We are overloading this skb field for use with the qpolicy subystem.
718          * The skb->priority is normally used for the SO_PRIORITY option, which
719          * is initialised from sk_priority. Since the assignment of sk_priority
720          * to skb->priority happens later (on layer 3), we overload this field
721          * for use with queueing priorities as long as the skb is on layer 4.
722          * The default priority value (if nothing is set) is 0.
723          */
724         skb->priority = 0;
725
726         for_each_cmsghdr(cmsg, msg) {
727                 if (!CMSG_OK(msg, cmsg))
728                         return -EINVAL;
729
730                 if (cmsg->cmsg_level != SOL_DCCP)
731                         continue;
732
733                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
734                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
735                         return -EINVAL;
736
737                 switch (cmsg->cmsg_type) {
738                 case DCCP_SCM_PRIORITY:
739                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
740                                 return -EINVAL;
741                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
742                         break;
743                 default:
744                         return -EINVAL;
745                 }
746         }
747         return 0;
748 }
749
750 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
751 {
752         const struct dccp_sock *dp = dccp_sk(sk);
753         const int flags = msg->msg_flags;
754         const int noblock = flags & MSG_DONTWAIT;
755         struct sk_buff *skb;
756         int rc, size;
757         long timeo;
758
759         if (len > dp->dccps_mss_cache)
760                 return -EMSGSIZE;
761
762         lock_sock(sk);
763
764         if (dccp_qpolicy_full(sk)) {
765                 rc = -EAGAIN;
766                 goto out_release;
767         }
768
769         timeo = sock_sndtimeo(sk, noblock);
770
771         /*
772          * We have to use sk_stream_wait_connect here to set sk_write_pending,
773          * so that the trick in dccp_rcv_request_sent_state_process.
774          */
775         /* Wait for a connection to finish. */
776         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
777                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
778                         goto out_release;
779
780         size = sk->sk_prot->max_header + len;
781         release_sock(sk);
782         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
783         lock_sock(sk);
784         if (skb == NULL)
785                 goto out_release;
786
787         skb_reserve(skb, sk->sk_prot->max_header);
788         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
789         if (rc != 0)
790                 goto out_discard;
791
792         rc = dccp_msghdr_parse(msg, skb);
793         if (rc != 0)
794                 goto out_discard;
795
796         dccp_qpolicy_push(sk, skb);
797         /*
798          * The xmit_timer is set if the TX CCID is rate-based and will expire
799          * when congestion control permits to release further packets into the
800          * network. Window-based CCIDs do not use this timer.
801          */
802         if (!timer_pending(&dp->dccps_xmit_timer))
803                 dccp_write_xmit(sk);
804 out_release:
805         release_sock(sk);
806         return rc ? : len;
807 out_discard:
808         kfree_skb(skb);
809         goto out_release;
810 }
811
812 EXPORT_SYMBOL_GPL(dccp_sendmsg);
813
814 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
815                  int flags, int *addr_len)
816 {
817         const struct dccp_hdr *dh;
818         long timeo;
819
820         lock_sock(sk);
821
822         if (sk->sk_state == DCCP_LISTEN) {
823                 len = -ENOTCONN;
824                 goto out;
825         }
826
827         timeo = sock_rcvtimeo(sk, nonblock);
828
829         do {
830                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
831
832                 if (skb == NULL)
833                         goto verify_sock_status;
834
835                 dh = dccp_hdr(skb);
836
837                 switch (dh->dccph_type) {
838                 case DCCP_PKT_DATA:
839                 case DCCP_PKT_DATAACK:
840                         goto found_ok_skb;
841
842                 case DCCP_PKT_CLOSE:
843                 case DCCP_PKT_CLOSEREQ:
844                         if (!(flags & MSG_PEEK))
845                                 dccp_finish_passive_close(sk);
846                         /* fall through */
847                 case DCCP_PKT_RESET:
848                         dccp_pr_debug("found fin (%s) ok!\n",
849                                       dccp_packet_name(dh->dccph_type));
850                         len = 0;
851                         goto found_fin_ok;
852                 default:
853                         dccp_pr_debug("packet_type=%s\n",
854                                       dccp_packet_name(dh->dccph_type));
855                         sk_eat_skb(sk, skb);
856                 }
857 verify_sock_status:
858                 if (sock_flag(sk, SOCK_DONE)) {
859                         len = 0;
860                         break;
861                 }
862
863                 if (sk->sk_err) {
864                         len = sock_error(sk);
865                         break;
866                 }
867
868                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
869                         len = 0;
870                         break;
871                 }
872
873                 if (sk->sk_state == DCCP_CLOSED) {
874                         if (!sock_flag(sk, SOCK_DONE)) {
875                                 /* This occurs when user tries to read
876                                  * from never connected socket.
877                                  */
878                                 len = -ENOTCONN;
879                                 break;
880                         }
881                         len = 0;
882                         break;
883                 }
884
885                 if (!timeo) {
886                         len = -EAGAIN;
887                         break;
888                 }
889
890                 if (signal_pending(current)) {
891                         len = sock_intr_errno(timeo);
892                         break;
893                 }
894
895                 sk_wait_data(sk, &timeo, NULL);
896                 continue;
897         found_ok_skb:
898                 if (len > skb->len)
899                         len = skb->len;
900                 else if (len < skb->len)
901                         msg->msg_flags |= MSG_TRUNC;
902
903                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
904                         /* Exception. Bailout! */
905                         len = -EFAULT;
906                         break;
907                 }
908                 if (flags & MSG_TRUNC)
909                         len = skb->len;
910         found_fin_ok:
911                 if (!(flags & MSG_PEEK))
912                         sk_eat_skb(sk, skb);
913                 break;
914         } while (1);
915 out:
916         release_sock(sk);
917         return len;
918 }
919
920 EXPORT_SYMBOL_GPL(dccp_recvmsg);
921
922 int inet_dccp_listen(struct socket *sock, int backlog)
923 {
924         struct sock *sk = sock->sk;
925         unsigned char old_state;
926         int err;
927
928         lock_sock(sk);
929
930         err = -EINVAL;
931         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
932                 goto out;
933
934         old_state = sk->sk_state;
935         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
936                 goto out;
937
938         /* Really, if the socket is already in listen state
939          * we can only allow the backlog to be adjusted.
940          */
941         if (old_state != DCCP_LISTEN) {
942                 /*
943                  * FIXME: here it probably should be sk->sk_prot->listen_start
944                  * see tcp_listen_start
945                  */
946                 err = dccp_listen_start(sk, backlog);
947                 if (err)
948                         goto out;
949         }
950         sk->sk_max_ack_backlog = backlog;
951         err = 0;
952
953 out:
954         release_sock(sk);
955         return err;
956 }
957
958 EXPORT_SYMBOL_GPL(inet_dccp_listen);
959
960 static void dccp_terminate_connection(struct sock *sk)
961 {
962         u8 next_state = DCCP_CLOSED;
963
964         switch (sk->sk_state) {
965         case DCCP_PASSIVE_CLOSE:
966         case DCCP_PASSIVE_CLOSEREQ:
967                 dccp_finish_passive_close(sk);
968                 break;
969         case DCCP_PARTOPEN:
970                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
971                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
972                 /* fall through */
973         case DCCP_OPEN:
974                 dccp_send_close(sk, 1);
975
976                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
977                     !dccp_sk(sk)->dccps_server_timewait)
978                         next_state = DCCP_ACTIVE_CLOSEREQ;
979                 else
980                         next_state = DCCP_CLOSING;
981                 /* fall through */
982         default:
983                 dccp_set_state(sk, next_state);
984         }
985 }
986
987 void dccp_close(struct sock *sk, long timeout)
988 {
989         struct dccp_sock *dp = dccp_sk(sk);
990         struct sk_buff *skb;
991         u32 data_was_unread = 0;
992         int state;
993
994         lock_sock(sk);
995
996         sk->sk_shutdown = SHUTDOWN_MASK;
997
998         if (sk->sk_state == DCCP_LISTEN) {
999                 dccp_set_state(sk, DCCP_CLOSED);
1000
1001                 /* Special case. */
1002                 inet_csk_listen_stop(sk);
1003
1004                 goto adjudge_to_death;
1005         }
1006
1007         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1008
1009         /*
1010          * We need to flush the recv. buffs.  We do this only on the
1011          * descriptor close, not protocol-sourced closes, because the
1012           *reader process may not have drained the data yet!
1013          */
1014         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1015                 data_was_unread += skb->len;
1016                 __kfree_skb(skb);
1017         }
1018
1019         /* If socket has been already reset kill it. */
1020         if (sk->sk_state == DCCP_CLOSED)
1021                 goto adjudge_to_death;
1022
1023         if (data_was_unread) {
1024                 /* Unread data was tossed, send an appropriate Reset Code */
1025                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1026                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1027                 dccp_set_state(sk, DCCP_CLOSED);
1028         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1029                 /* Check zero linger _after_ checking for unread data. */
1030                 sk->sk_prot->disconnect(sk, 0);
1031         } else if (sk->sk_state != DCCP_CLOSED) {
1032                 /*
1033                  * Normal connection termination. May need to wait if there are
1034                  * still packets in the TX queue that are delayed by the CCID.
1035                  */
1036                 dccp_flush_write_queue(sk, &timeout);
1037                 dccp_terminate_connection(sk);
1038         }
1039
1040         /*
1041          * Flush write queue. This may be necessary in several cases:
1042          * - we have been closed by the peer but still have application data;
1043          * - abortive termination (unread data or zero linger time),
1044          * - normal termination but queue could not be flushed within time limit
1045          */
1046         __skb_queue_purge(&sk->sk_write_queue);
1047
1048         sk_stream_wait_close(sk, timeout);
1049
1050 adjudge_to_death:
1051         state = sk->sk_state;
1052         sock_hold(sk);
1053         sock_orphan(sk);
1054
1055         /*
1056          * It is the last release_sock in its life. It will remove backlog.
1057          */
1058         release_sock(sk);
1059         /*
1060          * Now socket is owned by kernel and we acquire BH lock
1061          * to finish close. No need to check for user refs.
1062          */
1063         local_bh_disable();
1064         bh_lock_sock(sk);
1065         WARN_ON(sock_owned_by_user(sk));
1066
1067         percpu_counter_inc(sk->sk_prot->orphan_count);
1068
1069         /* Have we already been destroyed by a softirq or backlog? */
1070         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1071                 goto out;
1072
1073         if (sk->sk_state == DCCP_CLOSED)
1074                 inet_csk_destroy_sock(sk);
1075
1076         /* Otherwise, socket is reprieved until protocol close. */
1077
1078 out:
1079         bh_unlock_sock(sk);
1080         local_bh_enable();
1081         sock_put(sk);
1082 }
1083
1084 EXPORT_SYMBOL_GPL(dccp_close);
1085
1086 void dccp_shutdown(struct sock *sk, int how)
1087 {
1088         dccp_pr_debug("called shutdown(%x)\n", how);
1089 }
1090
1091 EXPORT_SYMBOL_GPL(dccp_shutdown);
1092
1093 static inline int __init dccp_mib_init(void)
1094 {
1095         dccp_statistics = alloc_percpu(struct dccp_mib);
1096         if (!dccp_statistics)
1097                 return -ENOMEM;
1098         return 0;
1099 }
1100
1101 static inline void dccp_mib_exit(void)
1102 {
1103         free_percpu(dccp_statistics);
1104 }
1105
1106 static int thash_entries;
1107 module_param(thash_entries, int, 0444);
1108 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1109
1110 #ifdef CONFIG_IP_DCCP_DEBUG
1111 bool dccp_debug;
1112 module_param(dccp_debug, bool, 0644);
1113 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1114
1115 EXPORT_SYMBOL_GPL(dccp_debug);
1116 #endif
1117
1118 static int __init dccp_init(void)
1119 {
1120         unsigned long goal;
1121         int ehash_order, bhash_order, i;
1122         int rc;
1123
1124         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1125                      FIELD_SIZEOF(struct sk_buff, cb));
1126         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1127         if (rc)
1128                 goto out_fail;
1129         rc = -ENOBUFS;
1130         inet_hashinfo_init(&dccp_hashinfo);
1131         dccp_hashinfo.bind_bucket_cachep =
1132                 kmem_cache_create("dccp_bind_bucket",
1133                                   sizeof(struct inet_bind_bucket), 0,
1134                                   SLAB_HWCACHE_ALIGN, NULL);
1135         if (!dccp_hashinfo.bind_bucket_cachep)
1136                 goto out_free_percpu;
1137
1138         /*
1139          * Size and allocate the main established and bind bucket
1140          * hash tables.
1141          *
1142          * The methodology is similar to that of the buffer cache.
1143          */
1144         if (totalram_pages >= (128 * 1024))
1145                 goal = totalram_pages >> (21 - PAGE_SHIFT);
1146         else
1147                 goal = totalram_pages >> (23 - PAGE_SHIFT);
1148
1149         if (thash_entries)
1150                 goal = (thash_entries *
1151                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1152         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1153                 ;
1154         do {
1155                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1156                                         sizeof(struct inet_ehash_bucket);
1157
1158                 while (hash_size & (hash_size - 1))
1159                         hash_size--;
1160                 dccp_hashinfo.ehash_mask = hash_size - 1;
1161                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1162                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1163         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1164
1165         if (!dccp_hashinfo.ehash) {
1166                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1167                 goto out_free_bind_bucket_cachep;
1168         }
1169
1170         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1171                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1172
1173         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1174                         goto out_free_dccp_ehash;
1175
1176         bhash_order = ehash_order;
1177
1178         do {
1179                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1180                                         sizeof(struct inet_bind_hashbucket);
1181                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1182                     bhash_order > 0)
1183                         continue;
1184                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1185                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1186         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1187
1188         if (!dccp_hashinfo.bhash) {
1189                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1190                 goto out_free_dccp_locks;
1191         }
1192
1193         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1194                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1195                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1196         }
1197
1198         rc = dccp_mib_init();
1199         if (rc)
1200                 goto out_free_dccp_bhash;
1201
1202         rc = dccp_ackvec_init();
1203         if (rc)
1204                 goto out_free_dccp_mib;
1205
1206         rc = dccp_sysctl_init();
1207         if (rc)
1208                 goto out_ackvec_exit;
1209
1210         rc = ccid_initialize_builtins();
1211         if (rc)
1212                 goto out_sysctl_exit;
1213
1214         dccp_timestamping_init();
1215
1216         return 0;
1217
1218 out_sysctl_exit:
1219         dccp_sysctl_exit();
1220 out_ackvec_exit:
1221         dccp_ackvec_exit();
1222 out_free_dccp_mib:
1223         dccp_mib_exit();
1224 out_free_dccp_bhash:
1225         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1226 out_free_dccp_locks:
1227         inet_ehash_locks_free(&dccp_hashinfo);
1228 out_free_dccp_ehash:
1229         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1230 out_free_bind_bucket_cachep:
1231         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1232 out_free_percpu:
1233         percpu_counter_destroy(&dccp_orphan_count);
1234 out_fail:
1235         dccp_hashinfo.bhash = NULL;
1236         dccp_hashinfo.ehash = NULL;
1237         dccp_hashinfo.bind_bucket_cachep = NULL;
1238         return rc;
1239 }
1240
1241 static void __exit dccp_fini(void)
1242 {
1243         ccid_cleanup_builtins();
1244         dccp_mib_exit();
1245         free_pages((unsigned long)dccp_hashinfo.bhash,
1246                    get_order(dccp_hashinfo.bhash_size *
1247                              sizeof(struct inet_bind_hashbucket)));
1248         free_pages((unsigned long)dccp_hashinfo.ehash,
1249                    get_order((dccp_hashinfo.ehash_mask + 1) *
1250                              sizeof(struct inet_ehash_bucket)));
1251         inet_ehash_locks_free(&dccp_hashinfo);
1252         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1253         dccp_ackvec_exit();
1254         dccp_sysctl_exit();
1255         percpu_counter_destroy(&dccp_orphan_count);
1256 }
1257
1258 module_init(dccp_init);
1259 module_exit(dccp_fini);
1260
1261 MODULE_LICENSE("GPL");
1262 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1263 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");