Merge tag 'drm-next-2019-01-05' of git://anongit.freedesktop.org/drm/drm
[sfrench/cifs-2.6.git] / drivers / crypto / chelsio / chtls / chtls_io.c
1 /*
2  * Copyright (c) 2018 Chelsio Communications, Inc.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * Written by: Atul Gupta (atul.gupta@chelsio.com)
9  */
10
11 #include <linux/module.h>
12 #include <linux/list.h>
13 #include <linux/workqueue.h>
14 #include <linux/skbuff.h>
15 #include <linux/timer.h>
16 #include <linux/notifier.h>
17 #include <linux/inetdevice.h>
18 #include <linux/ip.h>
19 #include <linux/tcp.h>
20 #include <linux/sched/signal.h>
21 #include <net/tcp.h>
22 #include <net/busy_poll.h>
23 #include <crypto/aes.h>
24
25 #include "chtls.h"
26 #include "chtls_cm.h"
27
28 static bool is_tls_tx(struct chtls_sock *csk)
29 {
30         return csk->tlshws.txkey >= 0;
31 }
32
33 static bool is_tls_rx(struct chtls_sock *csk)
34 {
35         return csk->tlshws.rxkey >= 0;
36 }
37
38 static int data_sgl_len(const struct sk_buff *skb)
39 {
40         unsigned int cnt;
41
42         cnt = skb_shinfo(skb)->nr_frags;
43         return sgl_len(cnt) * 8;
44 }
45
46 static int nos_ivs(struct sock *sk, unsigned int size)
47 {
48         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
49
50         return DIV_ROUND_UP(size, csk->tlshws.mfs);
51 }
52
53 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
54 {
55         int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
56         int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
57
58         if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
59             MAX_IMM_OFLD_TX_DATA_WR_LEN) {
60                 ULP_SKB_CB(skb)->ulp.tls.iv = 1;
61                 return 1;
62         }
63         ULP_SKB_CB(skb)->ulp.tls.iv = 0;
64         return 0;
65 }
66
67 static int max_ivs_size(struct sock *sk, int size)
68 {
69         return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
70 }
71
72 static int ivs_size(struct sock *sk, const struct sk_buff *skb)
73 {
74         return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
75                  CIPHER_BLOCK_SIZE) : 0;
76 }
77
78 static int flowc_wr_credits(int nparams, int *flowclenp)
79 {
80         int flowclen16, flowclen;
81
82         flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
83         flowclen16 = DIV_ROUND_UP(flowclen, 16);
84         flowclen = flowclen16 * 16;
85
86         if (flowclenp)
87                 *flowclenp = flowclen;
88
89         return flowclen16;
90 }
91
92 static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
93                                            struct fw_flowc_wr *flowc,
94                                            int flowclen)
95 {
96         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
97         struct sk_buff *skb;
98
99         skb = alloc_skb(flowclen, GFP_ATOMIC);
100         if (!skb)
101                 return NULL;
102
103         memcpy(__skb_put(skb, flowclen), flowc, flowclen);
104         skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
105
106         return skb;
107 }
108
109 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
110                          int flowclen)
111 {
112         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
113         struct tcp_sock *tp = tcp_sk(sk);
114         struct sk_buff *skb;
115         int flowclen16;
116         int ret;
117
118         flowclen16 = flowclen / 16;
119
120         if (csk_flag(sk, CSK_TX_DATA_SENT)) {
121                 skb = create_flowc_wr_skb(sk, flowc, flowclen);
122                 if (!skb)
123                         return -ENOMEM;
124
125                 skb_entail(sk, skb,
126                            ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
127                 return 0;
128         }
129
130         ret = cxgb4_immdata_send(csk->egress_dev,
131                                  csk->txq_idx,
132                                  flowc, flowclen);
133         if (!ret)
134                 return flowclen16;
135         skb = create_flowc_wr_skb(sk, flowc, flowclen);
136         if (!skb)
137                 return -ENOMEM;
138         send_or_defer(sk, tp, skb, 0);
139         return flowclen16;
140 }
141
142 static u8 tcp_state_to_flowc_state(u8 state)
143 {
144         switch (state) {
145         case TCP_ESTABLISHED:
146                 return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
147         case TCP_CLOSE_WAIT:
148                 return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
149         case TCP_FIN_WAIT1:
150                 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
151         case TCP_CLOSING:
152                 return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
153         case TCP_LAST_ACK:
154                 return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
155         case TCP_FIN_WAIT2:
156                 return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
157         }
158
159         return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
160 }
161
162 int send_tx_flowc_wr(struct sock *sk, int compl,
163                      u32 snd_nxt, u32 rcv_nxt)
164 {
165         struct flowc_packed {
166                 struct fw_flowc_wr fc;
167                 struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
168         } __packed sflowc;
169         int nparams, paramidx, flowclen16, flowclen;
170         struct fw_flowc_wr *flowc;
171         struct chtls_sock *csk;
172         struct tcp_sock *tp;
173
174         csk = rcu_dereference_sk_user_data(sk);
175         tp = tcp_sk(sk);
176         memset(&sflowc, 0, sizeof(sflowc));
177         flowc = &sflowc.fc;
178
179 #define FLOWC_PARAM(__m, __v) \
180         do { \
181                 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
182                 flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
183                 paramidx++; \
184         } while (0)
185
186         paramidx = 0;
187
188         FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
189         FLOWC_PARAM(CH, csk->tx_chan);
190         FLOWC_PARAM(PORT, csk->tx_chan);
191         FLOWC_PARAM(IQID, csk->rss_qid);
192         FLOWC_PARAM(SNDNXT, tp->snd_nxt);
193         FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
194         FLOWC_PARAM(SNDBUF, csk->sndbuf);
195         FLOWC_PARAM(MSS, tp->mss_cache);
196         FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
197
198         if (SND_WSCALE(tp))
199                 FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
200
201         if (csk->ulp_mode == ULP_MODE_TLS)
202                 FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
203
204         if (csk->tlshws.fcplenmax)
205                 FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
206
207         nparams = paramidx;
208 #undef FLOWC_PARAM
209
210         flowclen16 = flowc_wr_credits(nparams, &flowclen);
211         flowc->op_to_nparams =
212                 cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
213                             FW_WR_COMPL_V(compl) |
214                             FW_FLOWC_WR_NPARAMS_V(nparams));
215         flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
216                                           FW_WR_FLOWID_V(csk->tid));
217
218         return send_flowc_wr(sk, flowc, flowclen);
219 }
220
221 /* Copy IVs to WR */
222 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
223
224 {
225         struct chtls_sock *csk;
226         unsigned char *iv_loc;
227         struct chtls_hws *hws;
228         unsigned char *ivs;
229         u16 number_of_ivs;
230         struct page *page;
231         int err = 0;
232
233         csk = rcu_dereference_sk_user_data(sk);
234         hws = &csk->tlshws;
235         number_of_ivs = nos_ivs(sk, skb->len);
236
237         if (number_of_ivs > MAX_IVS_PAGE) {
238                 pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
239                 return -ENOMEM;
240         }
241
242         /* generate the  IVs */
243         ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
244         if (!ivs)
245                 return -ENOMEM;
246         get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
247
248         if (skb_ulp_tls_iv_imm(skb)) {
249                 /* send the IVs as immediate data in the WR */
250                 iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
251                                                 CIPHER_BLOCK_SIZE);
252                 if (iv_loc)
253                         memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
254
255                 hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
256         } else {
257                 /* Send the IVs as sgls */
258                 /* Already accounted IV DSGL for credits */
259                 skb_shinfo(skb)->nr_frags--;
260                 page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
261                 if (!page) {
262                         pr_info("%s : Page allocation for IVs failed\n",
263                                 __func__);
264                         err = -ENOMEM;
265                         goto out;
266                 }
267                 memcpy(page_address(page), ivs, number_of_ivs *
268                        CIPHER_BLOCK_SIZE);
269                 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
270                                    number_of_ivs * CIPHER_BLOCK_SIZE);
271                 hws->ivsize = 0;
272         }
273 out:
274         kfree(ivs);
275         return err;
276 }
277
278 /* Copy Key to WR */
279 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
280 {
281         struct ulptx_sc_memrd *sc_memrd;
282         struct chtls_sock *csk;
283         struct chtls_dev *cdev;
284         struct ulptx_idata *sc;
285         struct chtls_hws *hws;
286         u32 immdlen;
287         int kaddr;
288
289         csk = rcu_dereference_sk_user_data(sk);
290         hws = &csk->tlshws;
291         cdev = csk->cdev;
292
293         immdlen = sizeof(*sc) + sizeof(*sc_memrd);
294         kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
295         sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
296         if (sc) {
297                 sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
298                 sc->len = htonl(0);
299                 sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
300                 sc_memrd->cmd_to_len =
301                                 htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
302                                 ULP_TX_SC_MORE_V(1) |
303                                 ULPTX_LEN16_V(hws->keylen >> 4));
304                 sc_memrd->addr = htonl(kaddr);
305         }
306 }
307
308 static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
309 {
310         return hws->tx_seq_no++;
311 }
312
313 static bool is_sg_request(const struct sk_buff *skb)
314 {
315         return skb->peeked ||
316                 (skb->len > MAX_IMM_ULPTX_WR_LEN);
317 }
318
319 /*
320  * Returns true if an sk_buff carries urgent data.
321  */
322 static bool skb_urgent(struct sk_buff *skb)
323 {
324         return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
325 }
326
327 /* TLS content type for CPL SFO */
328 static unsigned char tls_content_type(unsigned char content_type)
329 {
330         switch (content_type) {
331         case TLS_HDR_TYPE_CCS:
332                 return CPL_TX_TLS_SFO_TYPE_CCS;
333         case TLS_HDR_TYPE_ALERT:
334                 return CPL_TX_TLS_SFO_TYPE_ALERT;
335         case TLS_HDR_TYPE_HANDSHAKE:
336                 return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
337         case TLS_HDR_TYPE_HEARTBEAT:
338                 return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
339         }
340         return CPL_TX_TLS_SFO_TYPE_DATA;
341 }
342
343 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
344                            int dlen, int tls_immd, u32 credits,
345                            int expn, int pdus)
346 {
347         struct fw_tlstx_data_wr *req_wr;
348         struct cpl_tx_tls_sfo *req_cpl;
349         unsigned int wr_ulp_mode_force;
350         struct tls_scmd *updated_scmd;
351         unsigned char data_type;
352         struct chtls_sock *csk;
353         struct net_device *dev;
354         struct chtls_hws *hws;
355         struct tls_scmd *scmd;
356         struct adapter *adap;
357         unsigned char *req;
358         int immd_len;
359         int iv_imm;
360         int len;
361
362         csk = rcu_dereference_sk_user_data(sk);
363         iv_imm = skb_ulp_tls_iv_imm(skb);
364         dev = csk->egress_dev;
365         adap = netdev2adap(dev);
366         hws = &csk->tlshws;
367         scmd = &hws->scmd;
368         len = dlen + expn;
369
370         dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
371         atomic_inc(&adap->chcr_stats.tls_pdu_tx);
372
373         updated_scmd = scmd;
374         updated_scmd->seqno_numivs &= 0xffffff80;
375         updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
376         hws->scmd = *updated_scmd;
377
378         req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
379         req_cpl = (struct cpl_tx_tls_sfo *)req;
380         req = (unsigned char *)__skb_push(skb, (sizeof(struct
381                                 fw_tlstx_data_wr)));
382
383         req_wr = (struct fw_tlstx_data_wr *)req;
384         immd_len = (tls_immd ? dlen : 0);
385         req_wr->op_to_immdlen =
386                 htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
387                 FW_TLSTX_DATA_WR_COMPL_V(1) |
388                 FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
389         req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
390                                      FW_TLSTX_DATA_WR_LEN16_V(credits));
391         wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
392
393         if (is_sg_request(skb))
394                 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
395                         ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
396                         FW_OFLD_TX_DATA_WR_SHOVE_F);
397
398         req_wr->lsodisable_to_flags =
399                         htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
400                               TX_URG_V(skb_urgent(skb)) |
401                               T6_TX_FORCE_F | wr_ulp_mode_force |
402                               TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
403                                          skb_queue_empty(&csk->txq)));
404
405         req_wr->ctxloc_to_exp =
406                         htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
407                               FW_TLSTX_DATA_WR_EXP_V(expn) |
408                               FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
409                               FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
410                               FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
411
412         /* Fill in the length */
413         req_wr->plen = htonl(len);
414         req_wr->mfs = htons(hws->mfs);
415         req_wr->adjustedplen_pkd =
416                 htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
417         req_wr->expinplenmax_pkd =
418                 htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
419         req_wr->pdusinplenmax_pkd =
420                 FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
421         req_wr->r10 = 0;
422
423         data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
424         req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
425                                        CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
426                                        CPL_TX_TLS_SFO_CPL_LEN_V(2) |
427                                        CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
428         req_cpl->pld_len = htonl(len - expn);
429
430         req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
431                 ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
432                 TLS_HDR_TYPE_HEARTBEAT : 0) |
433                 CPL_TX_TLS_SFO_PROTOVER_V(0));
434
435         /* create the s-command */
436         req_cpl->r1_lo = 0;
437         req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
438         req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
439         req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
440 }
441
442 /*
443  * Calculate the TLS data expansion size
444  */
445 static int chtls_expansion_size(struct sock *sk, int data_len,
446                                 int fullpdu,
447                                 unsigned short *pducnt)
448 {
449         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
450         struct chtls_hws *hws = &csk->tlshws;
451         struct tls_scmd *scmd = &hws->scmd;
452         int fragsize = hws->mfs;
453         int expnsize = 0;
454         int fragleft;
455         int fragcnt;
456         int expppdu;
457
458         if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
459             SCMD_CIPH_MODE_AES_GCM) {
460                 expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
461                           TLS_HEADER_LENGTH;
462
463                 if (fullpdu) {
464                         *pducnt = data_len / (expppdu + fragsize);
465                         if (*pducnt > 32)
466                                 *pducnt = 32;
467                         else if (!*pducnt)
468                                 *pducnt = 1;
469                         expnsize = (*pducnt) * expppdu;
470                         return expnsize;
471                 }
472                 fragcnt = (data_len / fragsize);
473                 expnsize =  fragcnt * expppdu;
474                 fragleft = data_len % fragsize;
475                 if (fragleft > 0)
476                         expnsize += expppdu;
477         }
478         return expnsize;
479 }
480
481 /* WR with IV, KEY and CPL SFO added */
482 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
483                                int tls_tx_imm, int tls_len, u32 credits)
484 {
485         unsigned short pdus_per_ulp = 0;
486         struct chtls_sock *csk;
487         struct chtls_hws *hws;
488         int expn_sz;
489         int pdus;
490
491         csk = rcu_dereference_sk_user_data(sk);
492         hws = &csk->tlshws;
493         pdus = DIV_ROUND_UP(tls_len, hws->mfs);
494         expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
495         if (!hws->compute) {
496                 hws->expansion = chtls_expansion_size(sk,
497                                                       hws->fcplenmax,
498                                                       1, &pdus_per_ulp);
499                 hws->pdus = pdus_per_ulp;
500                 hws->adjustlen = hws->pdus *
501                         ((hws->expansion / hws->pdus) + hws->mfs);
502                 hws->compute = 1;
503         }
504         if (tls_copy_ivs(sk, skb))
505                 return;
506         tls_copy_tx_key(sk, skb);
507         tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
508         hws->tx_seq_no += (pdus - 1);
509 }
510
511 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
512                             unsigned int immdlen, int len,
513                             u32 credits, u32 compl)
514 {
515         struct fw_ofld_tx_data_wr *req;
516         unsigned int wr_ulp_mode_force;
517         struct chtls_sock *csk;
518         unsigned int opcode;
519
520         csk = rcu_dereference_sk_user_data(sk);
521         opcode = FW_OFLD_TX_DATA_WR;
522
523         req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
524         req->op_to_immdlen = htonl(WR_OP_V(opcode) |
525                                 FW_WR_COMPL_V(compl) |
526                                 FW_WR_IMMDLEN_V(immdlen));
527         req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
528                                 FW_WR_LEN16_V(credits));
529
530         wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
531         if (is_sg_request(skb))
532                 wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
533                         ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
534                                 FW_OFLD_TX_DATA_WR_SHOVE_F);
535
536         req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
537                         TX_URG_V(skb_urgent(skb)) |
538                         TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
539                                    skb_queue_empty(&csk->txq)));
540         req->plen = htonl(len);
541 }
542
543 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
544                          bool size)
545 {
546         int wr_size;
547
548         wr_size = TLS_WR_CPL_LEN;
549         wr_size += KEY_ON_MEM_SZ;
550         wr_size += ivs_size(csk->sk, skb);
551
552         if (size)
553                 return wr_size;
554
555         /* frags counted for IV dsgl */
556         if (!skb_ulp_tls_iv_imm(skb))
557                 skb_shinfo(skb)->nr_frags++;
558
559         return wr_size;
560 }
561
562 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
563 {
564         int length = skb->len;
565
566         if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
567                 return false;
568
569         if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
570                 /* Check TLS header len for Immediate */
571                 if (csk->ulp_mode == ULP_MODE_TLS &&
572                     skb_ulp_tls_inline(skb))
573                         length += chtls_wr_size(csk, skb, true);
574                 else
575                         length += sizeof(struct fw_ofld_tx_data_wr);
576
577                 return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
578         }
579         return true;
580 }
581
582 static unsigned int calc_tx_flits(const struct sk_buff *skb,
583                                   unsigned int immdlen)
584 {
585         unsigned int flits, cnt;
586
587         flits = immdlen / 8;   /* headers */
588         cnt = skb_shinfo(skb)->nr_frags;
589         if (skb_tail_pointer(skb) != skb_transport_header(skb))
590                 cnt++;
591         return flits + sgl_len(cnt);
592 }
593
594 static void arp_failure_discard(void *handle, struct sk_buff *skb)
595 {
596         kfree_skb(skb);
597 }
598
599 int chtls_push_frames(struct chtls_sock *csk, int comp)
600 {
601         struct chtls_hws *hws = &csk->tlshws;
602         struct tcp_sock *tp;
603         struct sk_buff *skb;
604         int total_size = 0;
605         struct sock *sk;
606         int wr_size;
607
608         wr_size = sizeof(struct fw_ofld_tx_data_wr);
609         sk = csk->sk;
610         tp = tcp_sk(sk);
611
612         if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
613                 return 0;
614
615         if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
616                 return 0;
617
618         while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
619                (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
620                 skb_queue_len(&csk->txq) > 1)) {
621                 unsigned int credit_len = skb->len;
622                 unsigned int credits_needed;
623                 unsigned int completion = 0;
624                 int tls_len = skb->len;/* TLS data len before IV/key */
625                 unsigned int immdlen;
626                 int len = skb->len;    /* length [ulp bytes] inserted by hw */
627                 int flowclen16 = 0;
628                 int tls_tx_imm = 0;
629
630                 immdlen = skb->len;
631                 if (!is_ofld_imm(csk, skb)) {
632                         immdlen = skb_transport_offset(skb);
633                         if (skb_ulp_tls_inline(skb))
634                                 wr_size = chtls_wr_size(csk, skb, false);
635                         credit_len = 8 * calc_tx_flits(skb, immdlen);
636                 } else {
637                         if (skb_ulp_tls_inline(skb)) {
638                                 wr_size = chtls_wr_size(csk, skb, false);
639                                 tls_tx_imm = 1;
640                         }
641                 }
642                 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
643                         credit_len += wr_size;
644                 credits_needed = DIV_ROUND_UP(credit_len, 16);
645                 if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
646                         flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
647                                                       tp->rcv_nxt);
648                         if (flowclen16 <= 0)
649                                 break;
650                         csk->wr_credits -= flowclen16;
651                         csk->wr_unacked += flowclen16;
652                         csk->wr_nondata += flowclen16;
653                         csk_set_flag(csk, CSK_TX_DATA_SENT);
654                 }
655
656                 if (csk->wr_credits < credits_needed) {
657                         if (skb_ulp_tls_inline(skb) &&
658                             !skb_ulp_tls_iv_imm(skb))
659                                 skb_shinfo(skb)->nr_frags--;
660                         break;
661                 }
662
663                 __skb_unlink(skb, &csk->txq);
664                 skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
665                                       CPL_PRIORITY_DATA);
666                 if (hws->ofld)
667                         hws->txqid = (skb->queue_mapping >> 1);
668                 skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
669                 csk->wr_credits -= credits_needed;
670                 csk->wr_unacked += credits_needed;
671                 csk->wr_nondata = 0;
672                 enqueue_wr(csk, skb);
673
674                 if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
675                         if ((comp && csk->wr_unacked == credits_needed) ||
676                             (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
677                             csk->wr_unacked >= csk->wr_max_credits / 2) {
678                                 completion = 1;
679                                 csk->wr_unacked = 0;
680                         }
681                         if (skb_ulp_tls_inline(skb))
682                                 make_tlstx_data_wr(sk, skb, tls_tx_imm,
683                                                    tls_len, credits_needed);
684                         else
685                                 make_tx_data_wr(sk, skb, immdlen, len,
686                                                 credits_needed, completion);
687                         tp->snd_nxt += len;
688                         tp->lsndtime = tcp_time_stamp(tp);
689                         if (completion)
690                                 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
691                 } else {
692                         struct cpl_close_con_req *req = cplhdr(skb);
693                         unsigned int cmd  = CPL_OPCODE_G(ntohl
694                                              (OPCODE_TID(req)));
695
696                         if (cmd == CPL_CLOSE_CON_REQ)
697                                 csk_set_flag(csk,
698                                              CSK_CLOSE_CON_REQUESTED);
699
700                         if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
701                             (csk->wr_unacked >= csk->wr_max_credits / 2)) {
702                                 req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
703                                 csk->wr_unacked = 0;
704                         }
705                 }
706                 total_size += skb->truesize;
707                 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
708                         csk_set_flag(csk, CSK_TX_WAIT_IDLE);
709                 t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
710                 cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
711         }
712         sk->sk_wmem_queued -= total_size;
713         return total_size;
714 }
715
716 static void mark_urg(struct tcp_sock *tp, int flags,
717                      struct sk_buff *skb)
718 {
719         if (unlikely(flags & MSG_OOB)) {
720                 tp->snd_up = tp->write_seq;
721                 ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
722                                          ULPCB_FLAG_BARRIER |
723                                          ULPCB_FLAG_NO_APPEND |
724                                          ULPCB_FLAG_NEED_HDR;
725         }
726 }
727
728 /*
729  * Returns true if a connection should send more data to TCP engine
730  */
731 static bool should_push(struct sock *sk)
732 {
733         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
734         struct chtls_dev *cdev = csk->cdev;
735         struct tcp_sock *tp = tcp_sk(sk);
736
737         /*
738          * If we've released our offload resources there's nothing to do ...
739          */
740         if (!cdev)
741                 return false;
742
743         /*
744          * If there aren't any work requests in flight, or there isn't enough
745          * data in flight, or Nagle is off then send the current TX_DATA
746          * otherwise hold it and wait to accumulate more data.
747          */
748         return csk->wr_credits == csk->wr_max_credits ||
749                 (tp->nonagle & TCP_NAGLE_OFF);
750 }
751
752 /*
753  * Returns true if a TCP socket is corked.
754  */
755 static bool corked(const struct tcp_sock *tp, int flags)
756 {
757         return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
758 }
759
760 /*
761  * Returns true if a send should try to push new data.
762  */
763 static bool send_should_push(struct sock *sk, int flags)
764 {
765         return should_push(sk) && !corked(tcp_sk(sk), flags);
766 }
767
768 void chtls_tcp_push(struct sock *sk, int flags)
769 {
770         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
771         int qlen = skb_queue_len(&csk->txq);
772
773         if (likely(qlen)) {
774                 struct sk_buff *skb = skb_peek_tail(&csk->txq);
775                 struct tcp_sock *tp = tcp_sk(sk);
776
777                 mark_urg(tp, flags, skb);
778
779                 if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
780                     corked(tp, flags)) {
781                         ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
782                         return;
783                 }
784
785                 ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
786                 if (qlen == 1 &&
787                     ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
788                      should_push(sk)))
789                         chtls_push_frames(csk, 1);
790         }
791 }
792
793 /*
794  * Calculate the size for a new send sk_buff.  It's maximum size so we can
795  * pack lots of data into it, unless we plan to send it immediately, in which
796  * case we size it more tightly.
797  *
798  * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
799  * arise in normal cases and when it does we are just wasting memory.
800  */
801 static int select_size(struct sock *sk, int io_len, int flags, int len)
802 {
803         const int pgbreak = SKB_MAX_HEAD(len);
804
805         /*
806          * If the data wouldn't fit in the main body anyway, put only the
807          * header in the main body so it can use immediate data and place all
808          * the payload in page fragments.
809          */
810         if (io_len > pgbreak)
811                 return 0;
812
813         /*
814          * If we will be accumulating payload get a large main body.
815          */
816         if (!send_should_push(sk, flags))
817                 return pgbreak;
818
819         return io_len;
820 }
821
822 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
823 {
824         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
825         struct tcp_sock *tp = tcp_sk(sk);
826
827         ULP_SKB_CB(skb)->seq = tp->write_seq;
828         ULP_SKB_CB(skb)->flags = flags;
829         __skb_queue_tail(&csk->txq, skb);
830         sk->sk_wmem_queued += skb->truesize;
831
832         if (TCP_PAGE(sk) && TCP_OFF(sk)) {
833                 put_page(TCP_PAGE(sk));
834                 TCP_PAGE(sk) = NULL;
835                 TCP_OFF(sk) = 0;
836         }
837 }
838
839 static struct sk_buff *get_tx_skb(struct sock *sk, int size)
840 {
841         struct sk_buff *skb;
842
843         skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
844         if (likely(skb)) {
845                 skb_reserve(skb, TX_HEADER_LEN);
846                 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
847                 skb_reset_transport_header(skb);
848         }
849         return skb;
850 }
851
852 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
853 {
854         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
855         struct sk_buff *skb;
856
857         skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
858                         KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
859                         sk->sk_allocation);
860         if (likely(skb)) {
861                 skb_reserve(skb, (TX_TLSHDR_LEN +
862                             KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
863                 skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
864                 skb_reset_transport_header(skb);
865                 ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
866                 ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
867         }
868         return skb;
869 }
870
871 static void tx_skb_finalize(struct sk_buff *skb)
872 {
873         struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
874
875         if (!(cb->flags & ULPCB_FLAG_NO_HDR))
876                 cb->flags = ULPCB_FLAG_NEED_HDR;
877         cb->flags |= ULPCB_FLAG_NO_APPEND;
878 }
879
880 static void push_frames_if_head(struct sock *sk)
881 {
882         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
883
884         if (skb_queue_len(&csk->txq) == 1)
885                 chtls_push_frames(csk, 1);
886 }
887
888 static int chtls_skb_copy_to_page_nocache(struct sock *sk,
889                                           struct iov_iter *from,
890                                           struct sk_buff *skb,
891                                           struct page *page,
892                                           int off, int copy)
893 {
894         int err;
895
896         err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
897                                        off, copy, skb->len);
898         if (err)
899                 return err;
900
901         skb->len             += copy;
902         skb->data_len        += copy;
903         skb->truesize        += copy;
904         sk->sk_wmem_queued   += copy;
905         return 0;
906 }
907
908 /* Read TLS header to find content type and data length */
909 static int tls_header_read(struct tls_hdr *thdr, struct iov_iter *from)
910 {
911         if (copy_from_iter(thdr, sizeof(*thdr), from) != sizeof(*thdr))
912                 return -EFAULT;
913         return (__force int)cpu_to_be16(thdr->length);
914 }
915
916 static int csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
917 {
918         return (cdev->max_host_sndbuf - sk->sk_wmem_queued);
919 }
920
921 static int csk_wait_memory(struct chtls_dev *cdev,
922                            struct sock *sk, long *timeo_p)
923 {
924         DEFINE_WAIT_FUNC(wait, woken_wake_function);
925         int sndbuf, err = 0;
926         long current_timeo;
927         long vm_wait = 0;
928         bool noblock;
929
930         current_timeo = *timeo_p;
931         noblock = (*timeo_p ? false : true);
932         sndbuf = cdev->max_host_sndbuf;
933         if (csk_mem_free(cdev, sk)) {
934                 current_timeo = (prandom_u32() % (HZ / 5)) + 2;
935                 vm_wait = (prandom_u32() % (HZ / 5)) + 2;
936         }
937
938         add_wait_queue(sk_sleep(sk), &wait);
939         while (1) {
940                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
941
942                 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
943                         goto do_error;
944                 if (!*timeo_p) {
945                         if (noblock)
946                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
947                         goto do_nonblock;
948                 }
949                 if (signal_pending(current))
950                         goto do_interrupted;
951                 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
952                 if (csk_mem_free(cdev, sk) && !vm_wait)
953                         break;
954
955                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
956                 sk->sk_write_pending++;
957                 sk_wait_event(sk, &current_timeo, sk->sk_err ||
958                               (sk->sk_shutdown & SEND_SHUTDOWN) ||
959                               (csk_mem_free(cdev, sk) && !vm_wait), &wait);
960                 sk->sk_write_pending--;
961
962                 if (vm_wait) {
963                         vm_wait -= current_timeo;
964                         current_timeo = *timeo_p;
965                         if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
966                                 current_timeo -= vm_wait;
967                                 if (current_timeo < 0)
968                                         current_timeo = 0;
969                         }
970                         vm_wait = 0;
971                 }
972                 *timeo_p = current_timeo;
973         }
974 do_rm_wq:
975         remove_wait_queue(sk_sleep(sk), &wait);
976         return err;
977 do_error:
978         err = -EPIPE;
979         goto do_rm_wq;
980 do_nonblock:
981         err = -EAGAIN;
982         goto do_rm_wq;
983 do_interrupted:
984         err = sock_intr_errno(*timeo_p);
985         goto do_rm_wq;
986 }
987
988 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
989 {
990         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
991         struct chtls_dev *cdev = csk->cdev;
992         struct tcp_sock *tp = tcp_sk(sk);
993         struct sk_buff *skb;
994         int mss, flags, err;
995         int recordsz = 0;
996         int copied = 0;
997         long timeo;
998
999         lock_sock(sk);
1000         flags = msg->msg_flags;
1001         timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1002
1003         if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
1004                 err = sk_stream_wait_connect(sk, &timeo);
1005                 if (err)
1006                         goto out_err;
1007         }
1008
1009         sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1010         err = -EPIPE;
1011         if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1012                 goto out_err;
1013
1014         mss = csk->mss;
1015         csk_set_flag(csk, CSK_TX_MORE_DATA);
1016
1017         while (msg_data_left(msg)) {
1018                 int copy = 0;
1019
1020                 skb = skb_peek_tail(&csk->txq);
1021                 if (skb) {
1022                         copy = mss - skb->len;
1023                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1024                 }
1025                 if (!csk_mem_free(cdev, sk))
1026                         goto wait_for_sndbuf;
1027
1028                 if (is_tls_tx(csk) && !csk->tlshws.txleft) {
1029                         struct tls_hdr hdr;
1030
1031                         recordsz = tls_header_read(&hdr, &msg->msg_iter);
1032                         size -= TLS_HEADER_LENGTH;
1033                         copied += TLS_HEADER_LENGTH;
1034                         csk->tlshws.txleft = recordsz;
1035                         csk->tlshws.type = hdr.type;
1036                         if (skb)
1037                                 ULP_SKB_CB(skb)->ulp.tls.type = hdr.type;
1038                 }
1039
1040                 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1041                     copy <= 0) {
1042 new_buf:
1043                         if (skb) {
1044                                 tx_skb_finalize(skb);
1045                                 push_frames_if_head(sk);
1046                         }
1047
1048                         if (is_tls_tx(csk)) {
1049                                 skb = get_record_skb(sk,
1050                                                      select_size(sk,
1051                                                                  recordsz,
1052                                                                  flags,
1053                                                                  TX_TLSHDR_LEN),
1054                                                                  false);
1055                         } else {
1056                                 skb = get_tx_skb(sk,
1057                                                  select_size(sk, size, flags,
1058                                                              TX_HEADER_LEN));
1059                         }
1060                         if (unlikely(!skb))
1061                                 goto wait_for_memory;
1062
1063                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1064                         copy = mss;
1065                 }
1066                 if (copy > size)
1067                         copy = size;
1068
1069                 if (skb_tailroom(skb) > 0) {
1070                         copy = min(copy, skb_tailroom(skb));
1071                         if (is_tls_tx(csk))
1072                                 copy = min_t(int, copy, csk->tlshws.txleft);
1073                         err = skb_add_data_nocache(sk, skb,
1074                                                    &msg->msg_iter, copy);
1075                         if (err)
1076                                 goto do_fault;
1077                 } else {
1078                         int i = skb_shinfo(skb)->nr_frags;
1079                         struct page *page = TCP_PAGE(sk);
1080                         int pg_size = PAGE_SIZE;
1081                         int off = TCP_OFF(sk);
1082                         bool merge;
1083
1084                         if (page)
1085                                 pg_size <<= compound_order(page);
1086                         if (off < pg_size &&
1087                             skb_can_coalesce(skb, i, page, off)) {
1088                                 merge = 1;
1089                                 goto copy;
1090                         }
1091                         merge = 0;
1092                         if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
1093                             MAX_SKB_FRAGS))
1094                                 goto new_buf;
1095
1096                         if (page && off == pg_size) {
1097                                 put_page(page);
1098                                 TCP_PAGE(sk) = page = NULL;
1099                                 pg_size = PAGE_SIZE;
1100                         }
1101
1102                         if (!page) {
1103                                 gfp_t gfp = sk->sk_allocation;
1104                                 int order = cdev->send_page_order;
1105
1106                                 if (order) {
1107                                         page = alloc_pages(gfp | __GFP_COMP |
1108                                                            __GFP_NOWARN |
1109                                                            __GFP_NORETRY,
1110                                                            order);
1111                                         if (page)
1112                                                 pg_size <<=
1113                                                         compound_order(page);
1114                                 }
1115                                 if (!page) {
1116                                         page = alloc_page(gfp);
1117                                         pg_size = PAGE_SIZE;
1118                                 }
1119                                 if (!page)
1120                                         goto wait_for_memory;
1121                                 off = 0;
1122                         }
1123 copy:
1124                         if (copy > pg_size - off)
1125                                 copy = pg_size - off;
1126                         if (is_tls_tx(csk))
1127                                 copy = min_t(int, copy, csk->tlshws.txleft);
1128
1129                         err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
1130                                                              skb, page,
1131                                                              off, copy);
1132                         if (unlikely(err)) {
1133                                 if (!TCP_PAGE(sk)) {
1134                                         TCP_PAGE(sk) = page;
1135                                         TCP_OFF(sk) = 0;
1136                                 }
1137                                 goto do_fault;
1138                         }
1139                         /* Update the skb. */
1140                         if (merge) {
1141                                 skb_shinfo(skb)->frags[i - 1].size += copy;
1142                         } else {
1143                                 skb_fill_page_desc(skb, i, page, off, copy);
1144                                 if (off + copy < pg_size) {
1145                                         /* space left keep page */
1146                                         get_page(page);
1147                                         TCP_PAGE(sk) = page;
1148                                 } else {
1149                                         TCP_PAGE(sk) = NULL;
1150                                 }
1151                         }
1152                         TCP_OFF(sk) = off + copy;
1153                 }
1154                 if (unlikely(skb->len == mss))
1155                         tx_skb_finalize(skb);
1156                 tp->write_seq += copy;
1157                 copied += copy;
1158                 size -= copy;
1159
1160                 if (is_tls_tx(csk))
1161                         csk->tlshws.txleft -= copy;
1162
1163                 if (corked(tp, flags) &&
1164                     (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1165                         ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1166
1167                 if (size == 0)
1168                         goto out;
1169
1170                 if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
1171                         push_frames_if_head(sk);
1172                 continue;
1173 wait_for_sndbuf:
1174                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1175 wait_for_memory:
1176                 err = csk_wait_memory(cdev, sk, &timeo);
1177                 if (err)
1178                         goto do_error;
1179         }
1180 out:
1181         csk_reset_flag(csk, CSK_TX_MORE_DATA);
1182         if (copied)
1183                 chtls_tcp_push(sk, flags);
1184 done:
1185         release_sock(sk);
1186         return copied;
1187 do_fault:
1188         if (!skb->len) {
1189                 __skb_unlink(skb, &csk->txq);
1190                 sk->sk_wmem_queued -= skb->truesize;
1191                 __kfree_skb(skb);
1192         }
1193 do_error:
1194         if (copied)
1195                 goto out;
1196 out_err:
1197         if (csk_conn_inline(csk))
1198                 csk_reset_flag(csk, CSK_TX_MORE_DATA);
1199         copied = sk_stream_error(sk, flags, err);
1200         goto done;
1201 }
1202
1203 int chtls_sendpage(struct sock *sk, struct page *page,
1204                    int offset, size_t size, int flags)
1205 {
1206         struct chtls_sock *csk;
1207         struct chtls_dev *cdev;
1208         int mss, err, copied;
1209         struct tcp_sock *tp;
1210         long timeo;
1211
1212         tp = tcp_sk(sk);
1213         copied = 0;
1214         csk = rcu_dereference_sk_user_data(sk);
1215         cdev = csk->cdev;
1216         timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1217
1218         err = sk_stream_wait_connect(sk, &timeo);
1219         if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
1220             err != 0)
1221                 goto out_err;
1222
1223         mss = csk->mss;
1224         csk_set_flag(csk, CSK_TX_MORE_DATA);
1225
1226         while (size > 0) {
1227                 struct sk_buff *skb = skb_peek_tail(&csk->txq);
1228                 int copy, i;
1229
1230                 if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1231                     (copy = mss - skb->len) <= 0) {
1232 new_buf:
1233                         if (!csk_mem_free(cdev, sk))
1234                                 goto wait_for_sndbuf;
1235
1236                         if (is_tls_tx(csk)) {
1237                                 skb = get_record_skb(sk,
1238                                                      select_size(sk, size,
1239                                                                  flags,
1240                                                                  TX_TLSHDR_LEN),
1241                                                      true);
1242                         } else {
1243                                 skb = get_tx_skb(sk, 0);
1244                         }
1245                         if (!skb)
1246                                 goto wait_for_memory;
1247                         copy = mss;
1248                 }
1249                 if (copy > size)
1250                         copy = size;
1251
1252                 i = skb_shinfo(skb)->nr_frags;
1253                 if (skb_can_coalesce(skb, i, page, offset)) {
1254                         skb_shinfo(skb)->frags[i - 1].size += copy;
1255                 } else if (i < MAX_SKB_FRAGS) {
1256                         get_page(page);
1257                         skb_fill_page_desc(skb, i, page, offset, copy);
1258                 } else {
1259                         tx_skb_finalize(skb);
1260                         push_frames_if_head(sk);
1261                         goto new_buf;
1262                 }
1263
1264                 skb->len += copy;
1265                 if (skb->len == mss)
1266                         tx_skb_finalize(skb);
1267                 skb->data_len += copy;
1268                 skb->truesize += copy;
1269                 sk->sk_wmem_queued += copy;
1270                 tp->write_seq += copy;
1271                 copied += copy;
1272                 offset += copy;
1273                 size -= copy;
1274
1275                 if (corked(tp, flags) &&
1276                     (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1277                         ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1278
1279                 if (!size)
1280                         break;
1281
1282                 if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND))
1283                         push_frames_if_head(sk);
1284                 continue;
1285 wait_for_sndbuf:
1286                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1287 wait_for_memory:
1288                 err = csk_wait_memory(cdev, sk, &timeo);
1289                 if (err)
1290                         goto do_error;
1291         }
1292 out:
1293         csk_reset_flag(csk, CSK_TX_MORE_DATA);
1294         if (copied)
1295                 chtls_tcp_push(sk, flags);
1296 done:
1297         release_sock(sk);
1298         return copied;
1299
1300 do_error:
1301         if (copied)
1302                 goto out;
1303
1304 out_err:
1305         if (csk_conn_inline(csk))
1306                 csk_reset_flag(csk, CSK_TX_MORE_DATA);
1307         copied = sk_stream_error(sk, flags, err);
1308         goto done;
1309 }
1310
1311 static void chtls_select_window(struct sock *sk)
1312 {
1313         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1314         struct tcp_sock *tp = tcp_sk(sk);
1315         unsigned int wnd = tp->rcv_wnd;
1316
1317         wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
1318         wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
1319
1320         if (wnd > MAX_RCV_WND)
1321                 wnd = MAX_RCV_WND;
1322
1323 /*
1324  * Check if we need to grow the receive window in response to an increase in
1325  * the socket's receive buffer size.  Some applications increase the buffer
1326  * size dynamically and rely on the window to grow accordingly.
1327  */
1328
1329         if (wnd > tp->rcv_wnd) {
1330                 tp->rcv_wup -= wnd - tp->rcv_wnd;
1331                 tp->rcv_wnd = wnd;
1332                 /* Mark the receive window as updated */
1333                 csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
1334         }
1335 }
1336
1337 /*
1338  * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
1339  * to return without sending the message in case we cannot allocate
1340  * an sk_buff.  Returns the number of credits sent.
1341  */
1342 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
1343 {
1344         struct cpl_rx_data_ack *req;
1345         struct sk_buff *skb;
1346
1347         skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
1348         if (!skb)
1349                 return 0;
1350         __skb_put(skb, sizeof(*req));
1351         req = (struct cpl_rx_data_ack *)skb->head;
1352
1353         set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
1354         INIT_TP_WR(req, csk->tid);
1355         OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1356                                                     csk->tid));
1357         req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
1358                                        RX_FORCE_ACK_F);
1359         cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
1360         return credits;
1361 }
1362
1363 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
1364                              TCPF_FIN_WAIT1 | \
1365                              TCPF_FIN_WAIT2)
1366
1367 /*
1368  * Called after some received data has been read.  It returns RX credits
1369  * to the HW for the amount of data processed.
1370  */
1371 static void chtls_cleanup_rbuf(struct sock *sk, int copied)
1372 {
1373         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1374         struct tcp_sock *tp;
1375         int must_send;
1376         u32 credits;
1377         u32 thres;
1378
1379         thres = 15 * 1024;
1380
1381         if (!sk_in_state(sk, CREDIT_RETURN_STATE))
1382                 return;
1383
1384         chtls_select_window(sk);
1385         tp = tcp_sk(sk);
1386         credits = tp->copied_seq - tp->rcv_wup;
1387         if (unlikely(!credits))
1388                 return;
1389
1390 /*
1391  * For coalescing to work effectively ensure the receive window has
1392  * at least 16KB left.
1393  */
1394         must_send = credits + 16384 >= tp->rcv_wnd;
1395
1396         if (must_send || credits >= thres)
1397                 tp->rcv_wup += send_rx_credits(csk, credits);
1398 }
1399
1400 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1401                             int nonblock, int flags, int *addr_len)
1402 {
1403         struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1404         struct net_device *dev = csk->egress_dev;
1405         struct chtls_hws *hws = &csk->tlshws;
1406         struct tcp_sock *tp = tcp_sk(sk);
1407         struct adapter *adap;
1408         unsigned long avail;
1409         int buffers_freed;
1410         int copied = 0;
1411         int request;
1412         int target;
1413         long timeo;
1414
1415         adap = netdev2adap(dev);
1416         buffers_freed = 0;
1417
1418         timeo = sock_rcvtimeo(sk, nonblock);
1419         target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1420         request = len;
1421
1422         if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1423                 chtls_cleanup_rbuf(sk, copied);
1424
1425         do {
1426                 struct sk_buff *skb;
1427                 u32 offset = 0;
1428
1429                 if (unlikely(tp->urg_data &&
1430                              tp->urg_seq == tp->copied_seq)) {
1431                         if (copied)
1432                                 break;
1433                         if (signal_pending(current)) {
1434                                 copied = timeo ? sock_intr_errno(timeo) :
1435                                         -EAGAIN;
1436                                 break;
1437                         }
1438                 }
1439                 skb = skb_peek(&sk->sk_receive_queue);
1440                 if (skb)
1441                         goto found_ok_skb;
1442                 if (csk->wr_credits &&
1443                     skb_queue_len(&csk->txq) &&
1444                     chtls_push_frames(csk, csk->wr_credits ==
1445                                       csk->wr_max_credits))
1446                         sk->sk_write_space(sk);
1447
1448                 if (copied >= target && !sk->sk_backlog.tail)
1449                         break;
1450
1451                 if (copied) {
1452                         if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1453                             (sk->sk_shutdown & RCV_SHUTDOWN) ||
1454                             signal_pending(current))
1455                                 break;
1456
1457                         if (!timeo)
1458                                 break;
1459                 } else {
1460                         if (sock_flag(sk, SOCK_DONE))
1461                                 break;
1462                         if (sk->sk_err) {
1463                                 copied = sock_error(sk);
1464                                 break;
1465                         }
1466                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1467                                 break;
1468                         if (sk->sk_state == TCP_CLOSE) {
1469                                 copied = -ENOTCONN;
1470                                 break;
1471                         }
1472                         if (!timeo) {
1473                                 copied = -EAGAIN;
1474                                 break;
1475                         }
1476                         if (signal_pending(current)) {
1477                                 copied = sock_intr_errno(timeo);
1478                                 break;
1479                         }
1480                 }
1481                 if (sk->sk_backlog.tail) {
1482                         release_sock(sk);
1483                         lock_sock(sk);
1484                         chtls_cleanup_rbuf(sk, copied);
1485                         continue;
1486                 }
1487
1488                 if (copied >= target)
1489                         break;
1490                 chtls_cleanup_rbuf(sk, copied);
1491                 sk_wait_data(sk, &timeo, NULL);
1492                 continue;
1493 found_ok_skb:
1494                 if (!skb->len) {
1495                         skb_dst_set(skb, NULL);
1496                         __skb_unlink(skb, &sk->sk_receive_queue);
1497                         kfree_skb(skb);
1498
1499                         if (!copied && !timeo) {
1500                                 copied = -EAGAIN;
1501                                 break;
1502                         }
1503
1504                         if (copied < target) {
1505                                 release_sock(sk);
1506                                 lock_sock(sk);
1507                                 continue;
1508                         }
1509                         break;
1510                 }
1511                 offset = hws->copied_seq;
1512                 avail = skb->len - offset;
1513                 if (len < avail)
1514                         avail = len;
1515
1516                 if (unlikely(tp->urg_data)) {
1517                         u32 urg_offset = tp->urg_seq - tp->copied_seq;
1518
1519                         if (urg_offset < avail) {
1520                                 if (urg_offset) {
1521                                         avail = urg_offset;
1522                                 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
1523                                         /* First byte is urgent, skip */
1524                                         tp->copied_seq++;
1525                                         offset++;
1526                                         avail--;
1527                                         if (!avail)
1528                                                 goto skip_copy;
1529                                 }
1530                         }
1531                 }
1532                 if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
1533                         if (!copied) {
1534                                 copied = -EFAULT;
1535                                 break;
1536                         }
1537                 }
1538
1539                 copied += avail;
1540                 len -= avail;
1541                 hws->copied_seq += avail;
1542 skip_copy:
1543                 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1544                         tp->urg_data = 0;
1545
1546                 if ((avail + offset) >= skb->len) {
1547                         if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1548                                 tp->copied_seq += skb->len;
1549                                 hws->rcvpld = skb->hdr_len;
1550                         } else {
1551                                 tp->copied_seq += hws->rcvpld;
1552                         }
1553                         chtls_free_skb(sk, skb);
1554                         buffers_freed++;
1555                         hws->copied_seq = 0;
1556                         if (copied >= target &&
1557                             !skb_peek(&sk->sk_receive_queue))
1558                                 break;
1559                 }
1560         } while (len > 0);
1561
1562         if (buffers_freed)
1563                 chtls_cleanup_rbuf(sk, copied);
1564         release_sock(sk);
1565         return copied;
1566 }
1567
1568 /*
1569  * Peek at data in a socket's receive buffer.
1570  */
1571 static int peekmsg(struct sock *sk, struct msghdr *msg,
1572                    size_t len, int nonblock, int flags)
1573 {
1574         struct tcp_sock *tp = tcp_sk(sk);
1575         u32 peek_seq, offset;
1576         struct sk_buff *skb;
1577         int copied = 0;
1578         size_t avail;          /* amount of available data in current skb */
1579         long timeo;
1580
1581         lock_sock(sk);
1582         timeo = sock_rcvtimeo(sk, nonblock);
1583         peek_seq = tp->copied_seq;
1584
1585         do {
1586                 if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
1587                         if (copied)
1588                                 break;
1589                         if (signal_pending(current)) {
1590                                 copied = timeo ? sock_intr_errno(timeo) :
1591                                 -EAGAIN;
1592                                 break;
1593                         }
1594                 }
1595
1596                 skb_queue_walk(&sk->sk_receive_queue, skb) {
1597                         offset = peek_seq - ULP_SKB_CB(skb)->seq;
1598                         if (offset < skb->len)
1599                                 goto found_ok_skb;
1600                 }
1601
1602                 /* empty receive queue */
1603                 if (copied)
1604                         break;
1605                 if (sock_flag(sk, SOCK_DONE))
1606                         break;
1607                 if (sk->sk_err) {
1608                         copied = sock_error(sk);
1609                         break;
1610                 }
1611                 if (sk->sk_shutdown & RCV_SHUTDOWN)
1612                         break;
1613                 if (sk->sk_state == TCP_CLOSE) {
1614                         copied = -ENOTCONN;
1615                         break;
1616                 }
1617                 if (!timeo) {
1618                         copied = -EAGAIN;
1619                         break;
1620                 }
1621                 if (signal_pending(current)) {
1622                         copied = sock_intr_errno(timeo);
1623                         break;
1624                 }
1625
1626                 if (sk->sk_backlog.tail) {
1627                         /* Do not sleep, just process backlog. */
1628                         release_sock(sk);
1629                         lock_sock(sk);
1630                 } else {
1631                         sk_wait_data(sk, &timeo, NULL);
1632                 }
1633
1634                 if (unlikely(peek_seq != tp->copied_seq)) {
1635                         if (net_ratelimit())
1636                                 pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
1637                                         current->comm, current->pid);
1638                         peek_seq = tp->copied_seq;
1639                 }
1640                 continue;
1641
1642 found_ok_skb:
1643                 avail = skb->len - offset;
1644                 if (len < avail)
1645                         avail = len;
1646                 /*
1647                  * Do we have urgent data here?  We need to skip over the
1648                  * urgent byte.
1649                  */
1650                 if (unlikely(tp->urg_data)) {
1651                         u32 urg_offset = tp->urg_seq - peek_seq;
1652
1653                         if (urg_offset < avail) {
1654                                 /*
1655                                  * The amount of data we are preparing to copy
1656                                  * contains urgent data.
1657                                  */
1658                                 if (!urg_offset) { /* First byte is urgent */
1659                                         if (!sock_flag(sk, SOCK_URGINLINE)) {
1660                                                 peek_seq++;
1661                                                 offset++;
1662                                                 avail--;
1663                                         }
1664                                         if (!avail)
1665                                                 continue;
1666                                 } else {
1667                                         /* stop short of the urgent data */
1668                                         avail = urg_offset;
1669                                 }
1670                         }
1671                 }
1672
1673                 /*
1674                  * If MSG_TRUNC is specified the data is discarded.
1675                  */
1676                 if (likely(!(flags & MSG_TRUNC)))
1677                         if (skb_copy_datagram_msg(skb, offset, msg, len)) {
1678                                 if (!copied) {
1679                                         copied = -EFAULT;
1680                                         break;
1681                                 }
1682                         }
1683                 peek_seq += avail;
1684                 copied += avail;
1685                 len -= avail;
1686         } while (len > 0);
1687
1688         release_sock(sk);
1689         return copied;
1690 }
1691
1692 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1693                   int nonblock, int flags, int *addr_len)
1694 {
1695         struct tcp_sock *tp = tcp_sk(sk);
1696         struct chtls_sock *csk;
1697         struct chtls_hws *hws;
1698         unsigned long avail;    /* amount of available data in current skb */
1699         int buffers_freed;
1700         int copied = 0;
1701         int request;
1702         long timeo;
1703         int target;             /* Read at least this many bytes */
1704
1705         buffers_freed = 0;
1706
1707         if (unlikely(flags & MSG_OOB))
1708                 return tcp_prot.recvmsg(sk, msg, len, nonblock, flags,
1709                                         addr_len);
1710
1711         if (unlikely(flags & MSG_PEEK))
1712                 return peekmsg(sk, msg, len, nonblock, flags);
1713
1714         if (sk_can_busy_loop(sk) &&
1715             skb_queue_empty(&sk->sk_receive_queue) &&
1716             sk->sk_state == TCP_ESTABLISHED)
1717                 sk_busy_loop(sk, nonblock);
1718
1719         lock_sock(sk);
1720         csk = rcu_dereference_sk_user_data(sk);
1721         hws = &csk->tlshws;
1722
1723         if (is_tls_rx(csk))
1724                 return chtls_pt_recvmsg(sk, msg, len, nonblock,
1725                                         flags, addr_len);
1726
1727         timeo = sock_rcvtimeo(sk, nonblock);
1728         target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1729         request = len;
1730
1731         if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1732                 chtls_cleanup_rbuf(sk, copied);
1733
1734         do {
1735                 struct sk_buff *skb;
1736                 u32 offset;
1737
1738                 if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
1739                         if (copied)
1740                                 break;
1741                         if (signal_pending(current)) {
1742                                 copied = timeo ? sock_intr_errno(timeo) :
1743                                         -EAGAIN;
1744                                 break;
1745                         }
1746                 }
1747
1748                 skb = skb_peek(&sk->sk_receive_queue);
1749                 if (skb)
1750                         goto found_ok_skb;
1751
1752                 if (csk->wr_credits &&
1753                     skb_queue_len(&csk->txq) &&
1754                     chtls_push_frames(csk, csk->wr_credits ==
1755                                       csk->wr_max_credits))
1756                         sk->sk_write_space(sk);
1757
1758                 if (copied >= target && !sk->sk_backlog.tail)
1759                         break;
1760
1761                 if (copied) {
1762                         if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1763                             (sk->sk_shutdown & RCV_SHUTDOWN) ||
1764                             signal_pending(current))
1765                                 break;
1766                 } else {
1767                         if (sock_flag(sk, SOCK_DONE))
1768                                 break;
1769                         if (sk->sk_err) {
1770                                 copied = sock_error(sk);
1771                                 break;
1772                         }
1773                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1774                                 break;
1775                         if (sk->sk_state == TCP_CLOSE) {
1776                                 copied = -ENOTCONN;
1777                                 break;
1778                         }
1779                         if (!timeo) {
1780                                 copied = -EAGAIN;
1781                                 break;
1782                         }
1783                         if (signal_pending(current)) {
1784                                 copied = sock_intr_errno(timeo);
1785                                 break;
1786                         }
1787                 }
1788
1789                 if (sk->sk_backlog.tail) {
1790                         release_sock(sk);
1791                         lock_sock(sk);
1792                         chtls_cleanup_rbuf(sk, copied);
1793                         continue;
1794                 }
1795
1796                 if (copied >= target)
1797                         break;
1798                 chtls_cleanup_rbuf(sk, copied);
1799                 sk_wait_data(sk, &timeo, NULL);
1800                 continue;
1801
1802 found_ok_skb:
1803                 if (!skb->len) {
1804                         chtls_kfree_skb(sk, skb);
1805                         if (!copied && !timeo) {
1806                                 copied = -EAGAIN;
1807                                 break;
1808                         }
1809
1810                         if (copied < target)
1811                                 continue;
1812
1813                         break;
1814                 }
1815
1816                 offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
1817                 avail = skb->len - offset;
1818                 if (len < avail)
1819                         avail = len;
1820
1821                 if (unlikely(tp->urg_data)) {
1822                         u32 urg_offset = tp->urg_seq - tp->copied_seq;
1823
1824                         if (urg_offset < avail) {
1825                                 if (urg_offset) {
1826                                         avail = urg_offset;
1827                                 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
1828                                         tp->copied_seq++;
1829                                         offset++;
1830                                         avail--;
1831                                         if (!avail)
1832                                                 goto skip_copy;
1833                                 }
1834                         }
1835                 }
1836
1837                 if (likely(!(flags & MSG_TRUNC))) {
1838                         if (skb_copy_datagram_msg(skb, offset,
1839                                                   msg, avail)) {
1840                                 if (!copied) {
1841                                         copied = -EFAULT;
1842                                         break;
1843                                 }
1844                         }
1845                 }
1846
1847                 tp->copied_seq += avail;
1848                 copied += avail;
1849                 len -= avail;
1850
1851 skip_copy:
1852                 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1853                         tp->urg_data = 0;
1854
1855                 if (avail + offset >= skb->len) {
1856                         if (likely(skb))
1857                                 chtls_free_skb(sk, skb);
1858                         buffers_freed++;
1859
1860                         if  (copied >= target &&
1861                              !skb_peek(&sk->sk_receive_queue))
1862                                 break;
1863                 }
1864         } while (len > 0);
1865
1866         if (buffers_freed)
1867                 chtls_cleanup_rbuf(sk, copied);
1868
1869         release_sock(sk);
1870         return copied;
1871 }