rxrpc: Add keepalive for a call
[sfrench/cifs-2.6.git] / net / rxrpc / output.c
1 /* RxRPC packet transmission
2  *
3  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14 #include <linux/net.h>
15 #include <linux/gfp.h>
16 #include <linux/skbuff.h>
17 #include <linux/export.h>
18 #include <net/sock.h>
19 #include <net/af_rxrpc.h>
20 #include "ar-internal.h"
21
22 struct rxrpc_ack_buffer {
23         struct rxrpc_wire_header whdr;
24         struct rxrpc_ackpacket ack;
25         u8 acks[255];
26         u8 pad[3];
27         struct rxrpc_ackinfo ackinfo;
28 };
29
30 struct rxrpc_abort_buffer {
31         struct rxrpc_wire_header whdr;
32         __be32 abort_code;
33 };
34
35 /*
36  * Arrange for a keepalive ping a certain time after we last transmitted.  This
37  * lets the far side know we're still interested in this call and helps keep
38  * the route through any intervening firewall open.
39  *
40  * Receiving a response to the ping will prevent the ->expect_rx_by timer from
41  * expiring.
42  */
43 static void rxrpc_set_keepalive(struct rxrpc_call *call)
44 {
45         unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;
46
47         keepalive_at += now;
48         WRITE_ONCE(call->keepalive_at, keepalive_at);
49         rxrpc_reduce_call_timer(call, keepalive_at, now,
50                                 rxrpc_timer_set_for_keepalive);
51 }
52
53 /*
54  * Fill out an ACK packet.
55  */
56 static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
57                                  struct rxrpc_call *call,
58                                  struct rxrpc_ack_buffer *pkt,
59                                  rxrpc_seq_t *_hard_ack,
60                                  rxrpc_seq_t *_top,
61                                  u8 reason)
62 {
63         rxrpc_serial_t serial;
64         rxrpc_seq_t hard_ack, top, seq;
65         int ix;
66         u32 mtu, jmax;
67         u8 *ackp = pkt->acks;
68
69         /* Barrier against rxrpc_input_data(). */
70         serial = call->ackr_serial;
71         hard_ack = READ_ONCE(call->rx_hard_ack);
72         top = smp_load_acquire(&call->rx_top);
73         *_hard_ack = hard_ack;
74         *_top = top;
75
76         pkt->ack.bufferSpace    = htons(8);
77         pkt->ack.maxSkew        = htons(call->ackr_skew);
78         pkt->ack.firstPacket    = htonl(hard_ack + 1);
79         pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
80         pkt->ack.serial         = htonl(serial);
81         pkt->ack.reason         = reason;
82         pkt->ack.nAcks          = top - hard_ack;
83
84         if (reason == RXRPC_ACK_PING)
85                 pkt->whdr.flags |= RXRPC_REQUEST_ACK;
86
87         if (after(top, hard_ack)) {
88                 seq = hard_ack + 1;
89                 do {
90                         ix = seq & RXRPC_RXTX_BUFF_MASK;
91                         if (call->rxtx_buffer[ix])
92                                 *ackp++ = RXRPC_ACK_TYPE_ACK;
93                         else
94                                 *ackp++ = RXRPC_ACK_TYPE_NACK;
95                         seq++;
96                 } while (before_eq(seq, top));
97         }
98
99         mtu = conn->params.peer->if_mtu;
100         mtu -= conn->params.peer->hdrsize;
101         jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
102         pkt->ackinfo.rxMTU      = htonl(rxrpc_rx_mtu);
103         pkt->ackinfo.maxMTU     = htonl(mtu);
104         pkt->ackinfo.rwind      = htonl(call->rx_winsize);
105         pkt->ackinfo.jumbo_max  = htonl(jmax);
106
107         *ackp++ = 0;
108         *ackp++ = 0;
109         *ackp++ = 0;
110         return top - hard_ack + 3;
111 }
112
113 /*
114  * Send an ACK call packet.
115  */
116 int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
117                           rxrpc_serial_t *_serial)
118 {
119         struct rxrpc_connection *conn = NULL;
120         struct rxrpc_ack_buffer *pkt;
121         struct msghdr msg;
122         struct kvec iov[2];
123         rxrpc_serial_t serial;
124         rxrpc_seq_t hard_ack, top;
125         size_t len, n;
126         int ret;
127         u8 reason;
128
129         spin_lock_bh(&call->lock);
130         if (call->conn)
131                 conn = rxrpc_get_connection_maybe(call->conn);
132         spin_unlock_bh(&call->lock);
133         if (!conn)
134                 return -ECONNRESET;
135
136         pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
137         if (!pkt) {
138                 rxrpc_put_connection(conn);
139                 return -ENOMEM;
140         }
141
142         msg.msg_name    = &call->peer->srx.transport;
143         msg.msg_namelen = call->peer->srx.transport_len;
144         msg.msg_control = NULL;
145         msg.msg_controllen = 0;
146         msg.msg_flags   = 0;
147
148         pkt->whdr.epoch         = htonl(conn->proto.epoch);
149         pkt->whdr.cid           = htonl(call->cid);
150         pkt->whdr.callNumber    = htonl(call->call_id);
151         pkt->whdr.seq           = 0;
152         pkt->whdr.type          = RXRPC_PACKET_TYPE_ACK;
153         pkt->whdr.flags         = RXRPC_SLOW_START_OK | conn->out_clientflag;
154         pkt->whdr.userStatus    = 0;
155         pkt->whdr.securityIndex = call->security_ix;
156         pkt->whdr._rsvd         = 0;
157         pkt->whdr.serviceId     = htons(call->service_id);
158
159         spin_lock_bh(&call->lock);
160         if (ping) {
161                 reason = RXRPC_ACK_PING;
162         } else {
163                 reason = call->ackr_reason;
164                 if (!call->ackr_reason) {
165                         spin_unlock_bh(&call->lock);
166                         ret = 0;
167                         goto out;
168                 }
169                 call->ackr_reason = 0;
170         }
171         n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
172
173         spin_unlock_bh(&call->lock);
174
175         iov[0].iov_base = pkt;
176         iov[0].iov_len  = sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
177         iov[1].iov_base = &pkt->ackinfo;
178         iov[1].iov_len  = sizeof(pkt->ackinfo);
179         len = iov[0].iov_len + iov[1].iov_len;
180
181         serial = atomic_inc_return(&conn->serial);
182         pkt->whdr.serial = htonl(serial);
183         trace_rxrpc_tx_ack(call, serial,
184                            ntohl(pkt->ack.firstPacket),
185                            ntohl(pkt->ack.serial),
186                            pkt->ack.reason, pkt->ack.nAcks);
187         if (_serial)
188                 *_serial = serial;
189
190         if (ping) {
191                 call->ping_serial = serial;
192                 smp_wmb();
193                 /* We need to stick a time in before we send the packet in case
194                  * the reply gets back before kernel_sendmsg() completes - but
195                  * asking UDP to send the packet can take a relatively long
196                  * time, so we update the time after, on the assumption that
197                  * the packet transmission is more likely to happen towards the
198                  * end of the kernel_sendmsg() call.
199                  */
200                 call->ping_time = ktime_get_real();
201                 set_bit(RXRPC_CALL_PINGING, &call->flags);
202                 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
203         }
204
205         ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
206         if (ping)
207                 call->ping_time = ktime_get_real();
208
209         if (call->state < RXRPC_CALL_COMPLETE) {
210                 if (ret < 0) {
211                         if (ping)
212                                 clear_bit(RXRPC_CALL_PINGING, &call->flags);
213                         rxrpc_propose_ACK(call, pkt->ack.reason,
214                                           ntohs(pkt->ack.maxSkew),
215                                           ntohl(pkt->ack.serial),
216                                           true, true,
217                                           rxrpc_propose_ack_retry_tx);
218                 } else {
219                         spin_lock_bh(&call->lock);
220                         if (after(hard_ack, call->ackr_consumed))
221                                 call->ackr_consumed = hard_ack;
222                         if (after(top, call->ackr_seen))
223                                 call->ackr_seen = top;
224                         spin_unlock_bh(&call->lock);
225                 }
226
227                 rxrpc_set_keepalive(call);
228         }
229
230 out:
231         rxrpc_put_connection(conn);
232         kfree(pkt);
233         return ret;
234 }
235
236 /*
237  * Send an ABORT call packet.
238  */
239 int rxrpc_send_abort_packet(struct rxrpc_call *call)
240 {
241         struct rxrpc_connection *conn = NULL;
242         struct rxrpc_abort_buffer pkt;
243         struct msghdr msg;
244         struct kvec iov[1];
245         rxrpc_serial_t serial;
246         int ret;
247
248         /* Don't bother sending aborts for a client call once the server has
249          * hard-ACK'd all of its request data.  After that point, we're not
250          * going to stop the operation proceeding, and whilst we might limit
251          * the reply, it's not worth it if we can send a new call on the same
252          * channel instead, thereby closing off this call.
253          */
254         if (rxrpc_is_client_call(call) &&
255             test_bit(RXRPC_CALL_TX_LAST, &call->flags))
256                 return 0;
257
258         spin_lock_bh(&call->lock);
259         if (call->conn)
260                 conn = rxrpc_get_connection_maybe(call->conn);
261         spin_unlock_bh(&call->lock);
262         if (!conn)
263                 return -ECONNRESET;
264
265         msg.msg_name    = &call->peer->srx.transport;
266         msg.msg_namelen = call->peer->srx.transport_len;
267         msg.msg_control = NULL;
268         msg.msg_controllen = 0;
269         msg.msg_flags   = 0;
270
271         pkt.whdr.epoch          = htonl(conn->proto.epoch);
272         pkt.whdr.cid            = htonl(call->cid);
273         pkt.whdr.callNumber     = htonl(call->call_id);
274         pkt.whdr.seq            = 0;
275         pkt.whdr.type           = RXRPC_PACKET_TYPE_ABORT;
276         pkt.whdr.flags          = conn->out_clientflag;
277         pkt.whdr.userStatus     = 0;
278         pkt.whdr.securityIndex  = call->security_ix;
279         pkt.whdr._rsvd          = 0;
280         pkt.whdr.serviceId      = htons(call->service_id);
281         pkt.abort_code          = htonl(call->abort_code);
282
283         iov[0].iov_base = &pkt;
284         iov[0].iov_len  = sizeof(pkt);
285
286         serial = atomic_inc_return(&conn->serial);
287         pkt.whdr.serial = htonl(serial);
288
289         ret = kernel_sendmsg(conn->params.local->socket,
290                              &msg, iov, 1, sizeof(pkt));
291
292         rxrpc_put_connection(conn);
293         return ret;
294 }
295
296 /*
297  * send a packet through the transport endpoint
298  */
299 int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
300                            bool retrans)
301 {
302         struct rxrpc_connection *conn = call->conn;
303         struct rxrpc_wire_header whdr;
304         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
305         struct msghdr msg;
306         struct kvec iov[2];
307         rxrpc_serial_t serial;
308         size_t len;
309         bool lost = false;
310         int ret, opt;
311
312         _enter(",{%d}", skb->len);
313
314         /* Each transmission of a Tx packet needs a new serial number */
315         serial = atomic_inc_return(&conn->serial);
316
317         whdr.epoch      = htonl(conn->proto.epoch);
318         whdr.cid        = htonl(call->cid);
319         whdr.callNumber = htonl(call->call_id);
320         whdr.seq        = htonl(sp->hdr.seq);
321         whdr.serial     = htonl(serial);
322         whdr.type       = RXRPC_PACKET_TYPE_DATA;
323         whdr.flags      = sp->hdr.flags;
324         whdr.userStatus = 0;
325         whdr.securityIndex = call->security_ix;
326         whdr._rsvd      = htons(sp->hdr._rsvd);
327         whdr.serviceId  = htons(call->service_id);
328
329         if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
330             sp->hdr.seq == 1)
331                 whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
332
333         iov[0].iov_base = &whdr;
334         iov[0].iov_len = sizeof(whdr);
335         iov[1].iov_base = skb->head;
336         iov[1].iov_len = skb->len;
337         len = iov[0].iov_len + iov[1].iov_len;
338
339         msg.msg_name = &call->peer->srx.transport;
340         msg.msg_namelen = call->peer->srx.transport_len;
341         msg.msg_control = NULL;
342         msg.msg_controllen = 0;
343         msg.msg_flags = 0;
344
345         /* If our RTT cache needs working on, request an ACK.  Also request
346          * ACKs if a DATA packet appears to have been lost.
347          */
348         if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
349             (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
350              retrans ||
351              call->cong_mode == RXRPC_CALL_SLOW_START ||
352              (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
353              ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
354                           ktime_get_real())))
355                 whdr.flags |= RXRPC_REQUEST_ACK;
356
357         if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
358                 static int lose;
359                 if ((lose++ & 7) == 7) {
360                         ret = 0;
361                         lost = true;
362                         goto done;
363                 }
364         }
365
366         _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);
367
368         /* send the packet with the don't fragment bit set if we currently
369          * think it's small enough */
370         if (iov[1].iov_len >= call->peer->maxdata)
371                 goto send_fragmentable;
372
373         down_read(&conn->params.local->defrag_sem);
374         /* send the packet by UDP
375          * - returns -EMSGSIZE if UDP would have to fragment the packet
376          *   to go out of the interface
377          *   - in which case, we'll have processed the ICMP error
378          *     message and update the peer record
379          */
380         ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
381
382         up_read(&conn->params.local->defrag_sem);
383         if (ret == -EMSGSIZE)
384                 goto send_fragmentable;
385
386 done:
387         trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
388                             retrans, lost);
389         if (ret >= 0) {
390                 ktime_t now = ktime_get_real();
391                 skb->tstamp = now;
392                 smp_wmb();
393                 sp->hdr.serial = serial;
394                 if (whdr.flags & RXRPC_REQUEST_ACK) {
395                         call->peer->rtt_last_req = now;
396                         trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
397                         if (call->peer->rtt_usage > 1) {
398                                 unsigned long nowj = jiffies, ack_lost_at;
399
400                                 ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt);
401                                 if (ack_lost_at < 1)
402                                         ack_lost_at = 1;
403
404                                 ack_lost_at += nowj;
405                                 WRITE_ONCE(call->ack_lost_at, ack_lost_at);
406                                 rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
407                                                         rxrpc_timer_set_for_lost_ack);
408                         }
409                 }
410         }
411
412         rxrpc_set_keepalive(call);
413
414         _leave(" = %d [%u]", ret, call->peer->maxdata);
415         return ret;
416
417 send_fragmentable:
418         /* attempt to send this message with fragmentation enabled */
419         _debug("send fragment");
420
421         down_write(&conn->params.local->defrag_sem);
422
423         switch (conn->params.local->srx.transport.family) {
424         case AF_INET:
425                 opt = IP_PMTUDISC_DONT;
426                 ret = kernel_setsockopt(conn->params.local->socket,
427                                         SOL_IP, IP_MTU_DISCOVER,
428                                         (char *)&opt, sizeof(opt));
429                 if (ret == 0) {
430                         ret = kernel_sendmsg(conn->params.local->socket, &msg,
431                                              iov, 2, len);
432
433                         opt = IP_PMTUDISC_DO;
434                         kernel_setsockopt(conn->params.local->socket, SOL_IP,
435                                           IP_MTU_DISCOVER,
436                                           (char *)&opt, sizeof(opt));
437                 }
438                 break;
439
440 #ifdef CONFIG_AF_RXRPC_IPV6
441         case AF_INET6:
442                 opt = IPV6_PMTUDISC_DONT;
443                 ret = kernel_setsockopt(conn->params.local->socket,
444                                         SOL_IPV6, IPV6_MTU_DISCOVER,
445                                         (char *)&opt, sizeof(opt));
446                 if (ret == 0) {
447                         ret = kernel_sendmsg(conn->params.local->socket, &msg,
448                                              iov, 1, iov[0].iov_len);
449
450                         opt = IPV6_PMTUDISC_DO;
451                         kernel_setsockopt(conn->params.local->socket,
452                                           SOL_IPV6, IPV6_MTU_DISCOVER,
453                                           (char *)&opt, sizeof(opt));
454                 }
455                 break;
456 #endif
457         }
458
459         up_write(&conn->params.local->defrag_sem);
460         goto done;
461 }
462
463 /*
464  * reject packets through the local endpoint
465  */
466 void rxrpc_reject_packets(struct rxrpc_local *local)
467 {
468         struct sockaddr_rxrpc srx;
469         struct rxrpc_skb_priv *sp;
470         struct rxrpc_wire_header whdr;
471         struct sk_buff *skb;
472         struct msghdr msg;
473         struct kvec iov[2];
474         size_t size;
475         __be32 code;
476
477         _enter("%d", local->debug_id);
478
479         iov[0].iov_base = &whdr;
480         iov[0].iov_len = sizeof(whdr);
481         iov[1].iov_base = &code;
482         iov[1].iov_len = sizeof(code);
483         size = sizeof(whdr) + sizeof(code);
484
485         msg.msg_name = &srx.transport;
486         msg.msg_control = NULL;
487         msg.msg_controllen = 0;
488         msg.msg_flags = 0;
489
490         memset(&whdr, 0, sizeof(whdr));
491         whdr.type = RXRPC_PACKET_TYPE_ABORT;
492
493         while ((skb = skb_dequeue(&local->reject_queue))) {
494                 rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
495                 sp = rxrpc_skb(skb);
496
497                 if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
498                         msg.msg_namelen = srx.transport_len;
499
500                         code = htonl(skb->priority);
501
502                         whdr.epoch      = htonl(sp->hdr.epoch);
503                         whdr.cid        = htonl(sp->hdr.cid);
504                         whdr.callNumber = htonl(sp->hdr.callNumber);
505                         whdr.serviceId  = htons(sp->hdr.serviceId);
506                         whdr.flags      = sp->hdr.flags;
507                         whdr.flags      ^= RXRPC_CLIENT_INITIATED;
508                         whdr.flags      &= RXRPC_CLIENT_INITIATED;
509
510                         kernel_sendmsg(local->socket, &msg, iov, 2, size);
511                 }
512
513                 rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
514         }
515
516         _leave("");
517 }