Linux 6.9-rc6
[sfrench/cifs-2.6.git] / drivers / net / ethernet / google / gve / gve_rx_dqo.c
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6
7 #include "gve.h"
8 #include "gve_dqo.h"
9 #include "gve_adminq.h"
10 #include "gve_utils.h"
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <net/ip6_checksum.h>
16 #include <net/ipv6.h>
17 #include <net/tcp.h>
18
19 static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
20 {
21         return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
22 }
23
24 static void gve_free_page_dqo(struct gve_priv *priv,
25                               struct gve_rx_buf_state_dqo *bs,
26                               bool free_page)
27 {
28         page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
29         if (free_page)
30                 gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
31                               DMA_FROM_DEVICE);
32         bs->page_info.page = NULL;
33 }
34
35 static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
36 {
37         struct gve_rx_buf_state_dqo *buf_state;
38         s16 buffer_id;
39
40         buffer_id = rx->dqo.free_buf_states;
41         if (unlikely(buffer_id == -1))
42                 return NULL;
43
44         buf_state = &rx->dqo.buf_states[buffer_id];
45
46         /* Remove buf_state from free list */
47         rx->dqo.free_buf_states = buf_state->next;
48
49         /* Point buf_state to itself to mark it as allocated */
50         buf_state->next = buffer_id;
51
52         return buf_state;
53 }
54
55 static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
56                                        struct gve_rx_buf_state_dqo *buf_state)
57 {
58         s16 buffer_id = buf_state - rx->dqo.buf_states;
59
60         return buf_state->next == buffer_id;
61 }
62
63 static void gve_free_buf_state(struct gve_rx_ring *rx,
64                                struct gve_rx_buf_state_dqo *buf_state)
65 {
66         s16 buffer_id = buf_state - rx->dqo.buf_states;
67
68         buf_state->next = rx->dqo.free_buf_states;
69         rx->dqo.free_buf_states = buffer_id;
70 }
71
72 static struct gve_rx_buf_state_dqo *
73 gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
74 {
75         struct gve_rx_buf_state_dqo *buf_state;
76         s16 buffer_id;
77
78         buffer_id = list->head;
79         if (unlikely(buffer_id == -1))
80                 return NULL;
81
82         buf_state = &rx->dqo.buf_states[buffer_id];
83
84         /* Remove buf_state from list */
85         list->head = buf_state->next;
86         if (buf_state->next == -1)
87                 list->tail = -1;
88
89         /* Point buf_state to itself to mark it as allocated */
90         buf_state->next = buffer_id;
91
92         return buf_state;
93 }
94
95 static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
96                                   struct gve_index_list *list,
97                                   struct gve_rx_buf_state_dqo *buf_state)
98 {
99         s16 buffer_id = buf_state - rx->dqo.buf_states;
100
101         buf_state->next = -1;
102
103         if (list->head == -1) {
104                 list->head = buffer_id;
105                 list->tail = buffer_id;
106         } else {
107                 int tail = list->tail;
108
109                 rx->dqo.buf_states[tail].next = buffer_id;
110                 list->tail = buffer_id;
111         }
112 }
113
114 static struct gve_rx_buf_state_dqo *
115 gve_get_recycled_buf_state(struct gve_rx_ring *rx)
116 {
117         struct gve_rx_buf_state_dqo *buf_state;
118         int i;
119
120         /* Recycled buf states are immediately usable. */
121         buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
122         if (likely(buf_state))
123                 return buf_state;
124
125         if (unlikely(rx->dqo.used_buf_states.head == -1))
126                 return NULL;
127
128         /* Used buf states are only usable when ref count reaches 0, which means
129          * no SKBs refer to them.
130          *
131          * Search a limited number before giving up.
132          */
133         for (i = 0; i < 5; i++) {
134                 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
135                 if (gve_buf_ref_cnt(buf_state) == 0) {
136                         rx->dqo.used_buf_states_cnt--;
137                         return buf_state;
138                 }
139
140                 gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
141         }
142
143         /* For QPL, we cannot allocate any new buffers and must
144          * wait for the existing ones to be available.
145          */
146         if (rx->dqo.qpl)
147                 return NULL;
148
149         /* If there are no free buf states discard an entry from
150          * `used_buf_states` so it can be used.
151          */
152         if (unlikely(rx->dqo.free_buf_states == -1)) {
153                 buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
154                 if (gve_buf_ref_cnt(buf_state) == 0)
155                         return buf_state;
156
157                 gve_free_page_dqo(rx->gve, buf_state, true);
158                 gve_free_buf_state(rx, buf_state);
159         }
160
161         return NULL;
162 }
163
164 static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
165                               struct gve_rx_buf_state_dqo *buf_state)
166 {
167         struct gve_priv *priv = rx->gve;
168         u32 idx;
169
170         if (!rx->dqo.qpl) {
171                 int err;
172
173                 err = gve_alloc_page(priv, &priv->pdev->dev,
174                                      &buf_state->page_info.page,
175                                      &buf_state->addr,
176                                      DMA_FROM_DEVICE, GFP_ATOMIC);
177                 if (err)
178                         return err;
179         } else {
180                 idx = rx->dqo.next_qpl_page_idx;
181                 if (idx >= priv->rx_pages_per_qpl) {
182                         net_err_ratelimited("%s: Out of QPL pages\n",
183                                             priv->dev->name);
184                         return -ENOMEM;
185                 }
186                 buf_state->page_info.page = rx->dqo.qpl->pages[idx];
187                 buf_state->addr = rx->dqo.qpl->page_buses[idx];
188                 rx->dqo.next_qpl_page_idx++;
189         }
190         buf_state->page_info.page_offset = 0;
191         buf_state->page_info.page_address =
192                 page_address(buf_state->page_info.page);
193         buf_state->last_single_ref_offset = 0;
194
195         /* The page already has 1 ref. */
196         page_ref_add(buf_state->page_info.page, INT_MAX - 1);
197         buf_state->page_info.pagecnt_bias = INT_MAX;
198
199         return 0;
200 }
201
202 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
203 {
204         struct device *hdev = &priv->pdev->dev;
205         int buf_count = rx->dqo.bufq.mask + 1;
206
207         if (rx->dqo.hdr_bufs.data) {
208                 dma_free_coherent(hdev, priv->header_buf_size * buf_count,
209                                   rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
210                 rx->dqo.hdr_bufs.data = NULL;
211         }
212 }
213
214 void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
215 {
216         int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
217
218         if (!gve_rx_was_added_to_block(priv, idx))
219                 return;
220
221         gve_remove_napi(priv, ntfy_idx);
222         gve_rx_remove_from_block(priv, idx);
223 }
224
225 static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
226                                  struct gve_rx_alloc_rings_cfg *cfg)
227 {
228         struct device *hdev = &priv->pdev->dev;
229         size_t completion_queue_slots;
230         size_t buffer_queue_slots;
231         int idx = rx->q_num;
232         size_t size;
233         int i;
234
235         completion_queue_slots = rx->dqo.complq.mask + 1;
236         buffer_queue_slots = rx->dqo.bufq.mask + 1;
237
238         if (rx->q_resources) {
239                 dma_free_coherent(hdev, sizeof(*rx->q_resources),
240                                   rx->q_resources, rx->q_resources_bus);
241                 rx->q_resources = NULL;
242         }
243
244         for (i = 0; i < rx->dqo.num_buf_states; i++) {
245                 struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
246                 /* Only free page for RDA. QPL pages are freed in gve_main. */
247                 if (bs->page_info.page)
248                         gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
249         }
250         if (rx->dqo.qpl) {
251                 gve_unassign_qpl(cfg->qpl_cfg, rx->dqo.qpl->id);
252                 rx->dqo.qpl = NULL;
253         }
254
255         if (rx->dqo.bufq.desc_ring) {
256                 size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
257                 dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
258                                   rx->dqo.bufq.bus);
259                 rx->dqo.bufq.desc_ring = NULL;
260         }
261
262         if (rx->dqo.complq.desc_ring) {
263                 size = sizeof(rx->dqo.complq.desc_ring[0]) *
264                         completion_queue_slots;
265                 dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
266                                   rx->dqo.complq.bus);
267                 rx->dqo.complq.desc_ring = NULL;
268         }
269
270         kvfree(rx->dqo.buf_states);
271         rx->dqo.buf_states = NULL;
272
273         gve_rx_free_hdr_bufs(priv, rx);
274
275         netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
276 }
277
278 static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
279 {
280         struct device *hdev = &priv->pdev->dev;
281         int buf_count = rx->dqo.bufq.mask + 1;
282
283         rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
284                                                    &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
285         if (!rx->dqo.hdr_bufs.data)
286                 return -ENOMEM;
287
288         return 0;
289 }
290
291 void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
292 {
293         int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
294
295         gve_rx_add_to_block(priv, idx);
296         gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
297 }
298
299 static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
300                                  struct gve_rx_alloc_rings_cfg *cfg,
301                                  struct gve_rx_ring *rx,
302                                  int idx)
303 {
304         struct device *hdev = &priv->pdev->dev;
305         size_t size;
306         int i;
307
308         const u32 buffer_queue_slots = cfg->raw_addressing ?
309                 priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size;
310         const u32 completion_queue_slots = cfg->ring_size;
311
312         netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
313
314         memset(rx, 0, sizeof(*rx));
315         rx->gve = priv;
316         rx->q_num = idx;
317         rx->dqo.bufq.mask = buffer_queue_slots - 1;
318         rx->dqo.complq.num_free_slots = completion_queue_slots;
319         rx->dqo.complq.mask = completion_queue_slots - 1;
320         rx->ctx.skb_head = NULL;
321         rx->ctx.skb_tail = NULL;
322
323         rx->dqo.num_buf_states = cfg->raw_addressing ?
324                 min_t(s16, S16_MAX, buffer_queue_slots * 4) :
325                 priv->rx_pages_per_qpl;
326         rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
327                                       sizeof(rx->dqo.buf_states[0]),
328                                       GFP_KERNEL);
329         if (!rx->dqo.buf_states)
330                 return -ENOMEM;
331
332         /* Allocate header buffers for header-split */
333         if (cfg->enable_header_split)
334                 if (gve_rx_alloc_hdr_bufs(priv, rx))
335                         goto err;
336
337         /* Set up linked list of buffer IDs */
338         for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
339                 rx->dqo.buf_states[i].next = i + 1;
340
341         rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
342         rx->dqo.recycled_buf_states.head = -1;
343         rx->dqo.recycled_buf_states.tail = -1;
344         rx->dqo.used_buf_states.head = -1;
345         rx->dqo.used_buf_states.tail = -1;
346
347         /* Allocate RX completion queue */
348         size = sizeof(rx->dqo.complq.desc_ring[0]) *
349                 completion_queue_slots;
350         rx->dqo.complq.desc_ring =
351                 dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
352         if (!rx->dqo.complq.desc_ring)
353                 goto err;
354
355         /* Allocate RX buffer queue */
356         size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
357         rx->dqo.bufq.desc_ring =
358                 dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
359         if (!rx->dqo.bufq.desc_ring)
360                 goto err;
361
362         if (!cfg->raw_addressing) {
363                 rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx->q_num);
364                 if (!rx->dqo.qpl)
365                         goto err;
366                 rx->dqo.next_qpl_page_idx = 0;
367         }
368
369         rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
370                                              &rx->q_resources_bus, GFP_KERNEL);
371         if (!rx->q_resources)
372                 goto err;
373
374         return 0;
375
376 err:
377         gve_rx_free_ring_dqo(priv, rx, cfg);
378         return -ENOMEM;
379 }
380
381 void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
382 {
383         const struct gve_rx_ring *rx = &priv->rx[queue_idx];
384         u64 index = be32_to_cpu(rx->q_resources->db_index);
385
386         iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
387 }
388
389 int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
390                            struct gve_rx_alloc_rings_cfg *cfg)
391 {
392         struct gve_rx_ring *rx;
393         int err;
394         int i;
395
396         if (!cfg->raw_addressing && !cfg->qpls) {
397                 netif_err(priv, drv, priv->dev,
398                           "Cannot alloc QPL ring before allocing QPLs\n");
399                 return -EINVAL;
400         }
401
402         rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
403                       GFP_KERNEL);
404         if (!rx)
405                 return -ENOMEM;
406
407         for (i = 0; i < cfg->qcfg->num_queues; i++) {
408                 err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
409                 if (err) {
410                         netif_err(priv, drv, priv->dev,
411                                   "Failed to alloc rx ring=%d: err=%d\n",
412                                   i, err);
413                         goto err;
414                 }
415         }
416
417         cfg->rx = rx;
418         return 0;
419
420 err:
421         for (i--; i >= 0; i--)
422                 gve_rx_free_ring_dqo(priv, &rx[i], cfg);
423         kvfree(rx);
424         return err;
425 }
426
427 void gve_rx_free_rings_dqo(struct gve_priv *priv,
428                            struct gve_rx_alloc_rings_cfg *cfg)
429 {
430         struct gve_rx_ring *rx = cfg->rx;
431         int i;
432
433         if (!rx)
434                 return;
435
436         for (i = 0; i < cfg->qcfg->num_queues;  i++)
437                 gve_rx_free_ring_dqo(priv, &rx[i], cfg);
438
439         kvfree(rx);
440         cfg->rx = NULL;
441 }
442
443 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
444 {
445         struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
446         struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
447         struct gve_priv *priv = rx->gve;
448         u32 num_avail_slots;
449         u32 num_full_slots;
450         u32 num_posted = 0;
451
452         num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
453         num_avail_slots = bufq->mask - num_full_slots;
454
455         num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
456         while (num_posted < num_avail_slots) {
457                 struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
458                 struct gve_rx_buf_state_dqo *buf_state;
459
460                 buf_state = gve_get_recycled_buf_state(rx);
461                 if (unlikely(!buf_state)) {
462                         buf_state = gve_alloc_buf_state(rx);
463                         if (unlikely(!buf_state))
464                                 break;
465
466                         if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
467                                 u64_stats_update_begin(&rx->statss);
468                                 rx->rx_buf_alloc_fail++;
469                                 u64_stats_update_end(&rx->statss);
470                                 gve_free_buf_state(rx, buf_state);
471                                 break;
472                         }
473                 }
474
475                 desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
476                 desc->buf_addr = cpu_to_le64(buf_state->addr +
477                                              buf_state->page_info.page_offset);
478                 if (rx->dqo.hdr_bufs.data)
479                         desc->header_buf_addr =
480                                 cpu_to_le64(rx->dqo.hdr_bufs.addr +
481                                             priv->header_buf_size * bufq->tail);
482
483                 bufq->tail = (bufq->tail + 1) & bufq->mask;
484                 complq->num_free_slots--;
485                 num_posted++;
486
487                 if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
488                         gve_rx_write_doorbell_dqo(priv, rx->q_num);
489         }
490
491         rx->fill_cnt += num_posted;
492 }
493
494 static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
495                                 struct gve_rx_buf_state_dqo *buf_state)
496 {
497         const u16 data_buffer_size = priv->data_buffer_size_dqo;
498         int pagecount;
499
500         /* Can't reuse if we only fit one buffer per page */
501         if (data_buffer_size * 2 > PAGE_SIZE)
502                 goto mark_used;
503
504         pagecount = gve_buf_ref_cnt(buf_state);
505
506         /* Record the offset when we have a single remaining reference.
507          *
508          * When this happens, we know all of the other offsets of the page are
509          * usable.
510          */
511         if (pagecount == 1) {
512                 buf_state->last_single_ref_offset =
513                         buf_state->page_info.page_offset;
514         }
515
516         /* Use the next buffer sized chunk in the page. */
517         buf_state->page_info.page_offset += data_buffer_size;
518         buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
519
520         /* If we wrap around to the same offset without ever dropping to 1
521          * reference, then we don't know if this offset was ever freed.
522          */
523         if (buf_state->page_info.page_offset ==
524             buf_state->last_single_ref_offset) {
525                 goto mark_used;
526         }
527
528         gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
529         return;
530
531 mark_used:
532         gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
533         rx->dqo.used_buf_states_cnt++;
534 }
535
536 static void gve_rx_skb_csum(struct sk_buff *skb,
537                             const struct gve_rx_compl_desc_dqo *desc,
538                             struct gve_ptype ptype)
539 {
540         skb->ip_summed = CHECKSUM_NONE;
541
542         /* HW did not identify and process L3 and L4 headers. */
543         if (unlikely(!desc->l3_l4_processed))
544                 return;
545
546         if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
547                 if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
548                         return;
549         } else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
550                 /* Checksum should be skipped if this flag is set. */
551                 if (unlikely(desc->ipv6_ex_add))
552                         return;
553         }
554
555         if (unlikely(desc->csum_l4_err))
556                 return;
557
558         switch (ptype.l4_type) {
559         case GVE_L4_TYPE_TCP:
560         case GVE_L4_TYPE_UDP:
561         case GVE_L4_TYPE_ICMP:
562         case GVE_L4_TYPE_SCTP:
563                 skb->ip_summed = CHECKSUM_UNNECESSARY;
564                 break;
565         default:
566                 break;
567         }
568 }
569
570 static void gve_rx_skb_hash(struct sk_buff *skb,
571                             const struct gve_rx_compl_desc_dqo *compl_desc,
572                             struct gve_ptype ptype)
573 {
574         enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
575
576         if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
577                 hash_type = PKT_HASH_TYPE_L4;
578         else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
579                 hash_type = PKT_HASH_TYPE_L3;
580
581         skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
582 }
583
584 static void gve_rx_free_skb(struct gve_rx_ring *rx)
585 {
586         if (!rx->ctx.skb_head)
587                 return;
588
589         dev_kfree_skb_any(rx->ctx.skb_head);
590         rx->ctx.skb_head = NULL;
591         rx->ctx.skb_tail = NULL;
592 }
593
594 static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
595 {
596         if (!rx->dqo.qpl)
597                 return false;
598         if (rx->dqo.used_buf_states_cnt <
599                      (rx->dqo.num_buf_states -
600                      GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
601                 return false;
602         return true;
603 }
604
605 static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
606                                 struct gve_rx_buf_state_dqo *buf_state,
607                                 u16 buf_len)
608 {
609         struct page *page = alloc_page(GFP_ATOMIC);
610         int num_frags;
611
612         if (!page)
613                 return -ENOMEM;
614
615         memcpy(page_address(page),
616                buf_state->page_info.page_address +
617                buf_state->page_info.page_offset,
618                buf_len);
619         num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
620         skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
621                         0, buf_len, PAGE_SIZE);
622
623         u64_stats_update_begin(&rx->statss);
624         rx->rx_frag_alloc_cnt++;
625         u64_stats_update_end(&rx->statss);
626         /* Return unused buffer. */
627         gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
628         return 0;
629 }
630
631 /* Chains multi skbs for single rx packet.
632  * Returns 0 if buffer is appended, -1 otherwise.
633  */
634 static int gve_rx_append_frags(struct napi_struct *napi,
635                                struct gve_rx_buf_state_dqo *buf_state,
636                                u16 buf_len, struct gve_rx_ring *rx,
637                                struct gve_priv *priv)
638 {
639         int num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
640
641         if (unlikely(num_frags == MAX_SKB_FRAGS)) {
642                 struct sk_buff *skb;
643
644                 skb = napi_alloc_skb(napi, 0);
645                 if (!skb)
646                         return -1;
647
648                 if (rx->ctx.skb_tail == rx->ctx.skb_head)
649                         skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
650                 else
651                         rx->ctx.skb_tail->next = skb;
652                 rx->ctx.skb_tail = skb;
653                 num_frags = 0;
654         }
655         if (rx->ctx.skb_tail != rx->ctx.skb_head) {
656                 rx->ctx.skb_head->len += buf_len;
657                 rx->ctx.skb_head->data_len += buf_len;
658                 rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
659         }
660
661         /* Trigger ondemand page allocation if we are running low on buffers */
662         if (gve_rx_should_trigger_copy_ondemand(rx))
663                 return gve_rx_copy_ondemand(rx, buf_state, buf_len);
664
665         skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
666                         buf_state->page_info.page,
667                         buf_state->page_info.page_offset,
668                         buf_len, priv->data_buffer_size_dqo);
669         gve_dec_pagecnt_bias(&buf_state->page_info);
670
671         /* Advances buffer page-offset if page is partially used.
672          * Marks buffer as used if page is full.
673          */
674         gve_try_recycle_buf(priv, rx, buf_state);
675         return 0;
676 }
677
678 /* Returns 0 if descriptor is completed successfully.
679  * Returns -EINVAL if descriptor is invalid.
680  * Returns -ENOMEM if data cannot be copied to skb.
681  */
682 static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
683                       const struct gve_rx_compl_desc_dqo *compl_desc,
684                       u32 desc_idx, int queue_idx)
685 {
686         const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
687         const bool hbo = compl_desc->header_buffer_overflow;
688         const bool eop = compl_desc->end_of_packet != 0;
689         const bool hsplit = compl_desc->split_header;
690         struct gve_rx_buf_state_dqo *buf_state;
691         struct gve_priv *priv = rx->gve;
692         u16 buf_len;
693         u16 hdr_len;
694
695         if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
696                 net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
697                                     priv->dev->name, buffer_id);
698                 return -EINVAL;
699         }
700         buf_state = &rx->dqo.buf_states[buffer_id];
701         if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
702                 net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
703                                     priv->dev->name, buffer_id);
704                 return -EINVAL;
705         }
706
707         if (unlikely(compl_desc->rx_error)) {
708                 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
709                                       buf_state);
710                 return -EINVAL;
711         }
712
713         buf_len = compl_desc->packet_len;
714         hdr_len = compl_desc->header_len;
715
716         /* Page might have not been used for awhile and was likely last written
717          * by a different thread.
718          */
719         prefetch(buf_state->page_info.page);
720
721         /* Copy the header into the skb in the case of header split */
722         if (hsplit) {
723                 int unsplit = 0;
724
725                 if (hdr_len && !hbo) {
726                         rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
727                                                             rx->dqo.hdr_bufs.data +
728                                                             desc_idx * priv->header_buf_size,
729                                                             hdr_len);
730                         if (unlikely(!rx->ctx.skb_head))
731                                 goto error;
732                         rx->ctx.skb_tail = rx->ctx.skb_head;
733                 } else {
734                         unsplit = 1;
735                 }
736                 u64_stats_update_begin(&rx->statss);
737                 rx->rx_hsplit_pkt++;
738                 rx->rx_hsplit_unsplit_pkt += unsplit;
739                 rx->rx_hsplit_bytes += hdr_len;
740                 u64_stats_update_end(&rx->statss);
741         }
742
743         /* Sync the portion of dma buffer for CPU to read. */
744         dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
745                                       buf_state->page_info.page_offset,
746                                       buf_len, DMA_FROM_DEVICE);
747
748         /* Append to current skb if one exists. */
749         if (rx->ctx.skb_head) {
750                 if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
751                                                  priv)) != 0) {
752                         goto error;
753                 }
754                 return 0;
755         }
756
757         if (eop && buf_len <= priv->rx_copybreak) {
758                 rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
759                                                &buf_state->page_info, buf_len);
760                 if (unlikely(!rx->ctx.skb_head))
761                         goto error;
762                 rx->ctx.skb_tail = rx->ctx.skb_head;
763
764                 u64_stats_update_begin(&rx->statss);
765                 rx->rx_copied_pkt++;
766                 rx->rx_copybreak_pkt++;
767                 u64_stats_update_end(&rx->statss);
768
769                 gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
770                                       buf_state);
771                 return 0;
772         }
773
774         rx->ctx.skb_head = napi_get_frags(napi);
775         if (unlikely(!rx->ctx.skb_head))
776                 goto error;
777         rx->ctx.skb_tail = rx->ctx.skb_head;
778
779         if (gve_rx_should_trigger_copy_ondemand(rx)) {
780                 if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
781                         goto error;
782                 return 0;
783         }
784
785         skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
786                         buf_state->page_info.page_offset, buf_len,
787                         priv->data_buffer_size_dqo);
788         gve_dec_pagecnt_bias(&buf_state->page_info);
789
790         gve_try_recycle_buf(priv, rx, buf_state);
791         return 0;
792
793 error:
794         gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
795         return -ENOMEM;
796 }
797
798 static int gve_rx_complete_rsc(struct sk_buff *skb,
799                                const struct gve_rx_compl_desc_dqo *desc,
800                                struct gve_ptype ptype)
801 {
802         struct skb_shared_info *shinfo = skb_shinfo(skb);
803
804         /* Only TCP is supported right now. */
805         if (ptype.l4_type != GVE_L4_TYPE_TCP)
806                 return -EINVAL;
807
808         switch (ptype.l3_type) {
809         case GVE_L3_TYPE_IPV4:
810                 shinfo->gso_type = SKB_GSO_TCPV4;
811                 break;
812         case GVE_L3_TYPE_IPV6:
813                 shinfo->gso_type = SKB_GSO_TCPV6;
814                 break;
815         default:
816                 return -EINVAL;
817         }
818
819         shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
820         return 0;
821 }
822
823 /* Returns 0 if skb is completed successfully, -1 otherwise. */
824 static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
825                                const struct gve_rx_compl_desc_dqo *desc,
826                                netdev_features_t feat)
827 {
828         struct gve_ptype ptype =
829                 rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
830         int err;
831
832         skb_record_rx_queue(rx->ctx.skb_head, rx->q_num);
833
834         if (feat & NETIF_F_RXHASH)
835                 gve_rx_skb_hash(rx->ctx.skb_head, desc, ptype);
836
837         if (feat & NETIF_F_RXCSUM)
838                 gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
839
840         /* RSC packets must set gso_size otherwise the TCP stack will complain
841          * that packets are larger than MTU.
842          */
843         if (desc->rsc) {
844                 err = gve_rx_complete_rsc(rx->ctx.skb_head, desc, ptype);
845                 if (err < 0)
846                         return err;
847         }
848
849         if (skb_headlen(rx->ctx.skb_head) == 0)
850                 napi_gro_frags(napi);
851         else
852                 napi_gro_receive(napi, rx->ctx.skb_head);
853
854         return 0;
855 }
856
857 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
858 {
859         struct napi_struct *napi = &block->napi;
860         netdev_features_t feat = napi->dev->features;
861
862         struct gve_rx_ring *rx = block->rx;
863         struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
864
865         u32 work_done = 0;
866         u64 bytes = 0;
867         int err;
868
869         while (work_done < budget) {
870                 struct gve_rx_compl_desc_dqo *compl_desc =
871                         &complq->desc_ring[complq->head];
872                 u32 pkt_bytes;
873
874                 /* No more new packets */
875                 if (compl_desc->generation == complq->cur_gen_bit)
876                         break;
877
878                 /* Prefetch the next two descriptors. */
879                 prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
880                 prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
881
882                 /* Do not read data until we own the descriptor */
883                 dma_rmb();
884
885                 err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
886                 if (err < 0) {
887                         gve_rx_free_skb(rx);
888                         u64_stats_update_begin(&rx->statss);
889                         if (err == -ENOMEM)
890                                 rx->rx_skb_alloc_fail++;
891                         else if (err == -EINVAL)
892                                 rx->rx_desc_err_dropped_pkt++;
893                         u64_stats_update_end(&rx->statss);
894                 }
895
896                 complq->head = (complq->head + 1) & complq->mask;
897                 complq->num_free_slots++;
898
899                 /* When the ring wraps, the generation bit is flipped. */
900                 complq->cur_gen_bit ^= (complq->head == 0);
901
902                 /* Receiving a completion means we have space to post another
903                  * buffer on the buffer queue.
904                  */
905                 {
906                         struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
907
908                         bufq->head = (bufq->head + 1) & bufq->mask;
909                 }
910
911                 /* Free running counter of completed descriptors */
912                 rx->cnt++;
913
914                 if (!rx->ctx.skb_head)
915                         continue;
916
917                 if (!compl_desc->end_of_packet)
918                         continue;
919
920                 work_done++;
921                 pkt_bytes = rx->ctx.skb_head->len;
922                 /* The ethernet header (first ETH_HLEN bytes) is snipped off
923                  * by eth_type_trans.
924                  */
925                 if (skb_headlen(rx->ctx.skb_head))
926                         pkt_bytes += ETH_HLEN;
927
928                 /* gve_rx_complete_skb() will consume skb if successful */
929                 if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
930                         gve_rx_free_skb(rx);
931                         u64_stats_update_begin(&rx->statss);
932                         rx->rx_desc_err_dropped_pkt++;
933                         u64_stats_update_end(&rx->statss);
934                         continue;
935                 }
936
937                 bytes += pkt_bytes;
938                 rx->ctx.skb_head = NULL;
939                 rx->ctx.skb_tail = NULL;
940         }
941
942         gve_rx_post_buffers_dqo(rx);
943
944         u64_stats_update_begin(&rx->statss);
945         rx->rpackets += work_done;
946         rx->rbytes += bytes;
947         u64_stats_update_end(&rx->statss);
948
949         return work_done;
950 }