Merge tag 'reset-for-v5.3' of git://git.pengutronix.de/git/pza/linux into arm/drivers
[sfrench/cifs-2.6.git] / drivers / misc / mic / vop / vop_vringh.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Intel MIC Platform Software Stack (MPSS)
4  *
5  * Copyright(c) 2016 Intel Corporation.
6  *
7  * Intel Virtio Over PCIe (VOP) driver.
8  */
9 #include <linux/sched.h>
10 #include <linux/poll.h>
11 #include <linux/dma-mapping.h>
12
13 #include <linux/mic_common.h>
14 #include "../common/mic_dev.h"
15
16 #include <linux/mic_ioctl.h>
17 #include "vop_main.h"
18
19 /* Helper API to obtain the VOP PCIe device */
20 static inline struct device *vop_dev(struct vop_vdev *vdev)
21 {
22         return vdev->vpdev->dev.parent;
23 }
24
25 /* Helper API to check if a virtio device is initialized */
26 static inline int vop_vdev_inited(struct vop_vdev *vdev)
27 {
28         if (!vdev)
29                 return -EINVAL;
30         /* Device has not been created yet */
31         if (!vdev->dd || !vdev->dd->type) {
32                 dev_err(vop_dev(vdev), "%s %d err %d\n",
33                         __func__, __LINE__, -EINVAL);
34                 return -EINVAL;
35         }
36         /* Device has been removed/deleted */
37         if (vdev->dd->type == -1) {
38                 dev_dbg(vop_dev(vdev), "%s %d err %d\n",
39                         __func__, __LINE__, -ENODEV);
40                 return -ENODEV;
41         }
42         return 0;
43 }
44
45 static void _vop_notify(struct vringh *vrh)
46 {
47         struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
48         struct vop_vdev *vdev = vvrh->vdev;
49         struct vop_device *vpdev = vdev->vpdev;
50         s8 db = vdev->dc->h2c_vdev_db;
51
52         if (db != -1)
53                 vpdev->hw_ops->send_intr(vpdev, db);
54 }
55
56 static void vop_virtio_init_post(struct vop_vdev *vdev)
57 {
58         struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
59         struct vop_device *vpdev = vdev->vpdev;
60         int i, used_size;
61
62         for (i = 0; i < vdev->dd->num_vq; i++) {
63                 used_size = PAGE_ALIGN(sizeof(u16) * 3 +
64                                 sizeof(struct vring_used_elem) *
65                                 le16_to_cpu(vqconfig->num));
66                 if (!le64_to_cpu(vqconfig[i].used_address)) {
67                         dev_warn(vop_dev(vdev), "used_address zero??\n");
68                         continue;
69                 }
70                 vdev->vvr[i].vrh.vring.used =
71                         (void __force *)vpdev->hw_ops->remap(
72                         vpdev,
73                         le64_to_cpu(vqconfig[i].used_address),
74                         used_size);
75         }
76
77         vdev->dc->used_address_updated = 0;
78
79         dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
80                  __func__, vdev->virtio_id);
81 }
82
83 static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
84 {
85         int i;
86
87         dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
88                 __func__, vdev->dd->status, vdev->virtio_id);
89
90         for (i = 0; i < vdev->dd->num_vq; i++)
91                 /*
92                  * Avoid lockdep false positive. The + 1 is for the vop
93                  * mutex which is held in the reset devices code path.
94                  */
95                 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
96
97         /* 0 status means "reset" */
98         vdev->dd->status = 0;
99         vdev->dc->vdev_reset = 0;
100         vdev->dc->host_ack = 1;
101
102         for (i = 0; i < vdev->dd->num_vq; i++) {
103                 struct vringh *vrh = &vdev->vvr[i].vrh;
104
105                 vdev->vvr[i].vring.info->avail_idx = 0;
106                 vrh->completed = 0;
107                 vrh->last_avail_idx = 0;
108                 vrh->last_used_idx = 0;
109         }
110
111         for (i = 0; i < vdev->dd->num_vq; i++)
112                 mutex_unlock(&vdev->vvr[i].vr_mutex);
113 }
114
115 static void vop_virtio_reset_devices(struct vop_info *vi)
116 {
117         struct list_head *pos, *tmp;
118         struct vop_vdev *vdev;
119
120         list_for_each_safe(pos, tmp, &vi->vdev_list) {
121                 vdev = list_entry(pos, struct vop_vdev, list);
122                 vop_virtio_device_reset(vdev);
123                 vdev->poll_wake = 1;
124                 wake_up(&vdev->waitq);
125         }
126 }
127
128 static void vop_bh_handler(struct work_struct *work)
129 {
130         struct vop_vdev *vdev = container_of(work, struct vop_vdev,
131                         virtio_bh_work);
132
133         if (vdev->dc->used_address_updated)
134                 vop_virtio_init_post(vdev);
135
136         if (vdev->dc->vdev_reset)
137                 vop_virtio_device_reset(vdev);
138
139         vdev->poll_wake = 1;
140         wake_up(&vdev->waitq);
141 }
142
143 static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
144 {
145         struct vop_vdev *vdev = data;
146         struct vop_device *vpdev = vdev->vpdev;
147
148         vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
149         schedule_work(&vdev->virtio_bh_work);
150         return IRQ_HANDLED;
151 }
152
153 static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
154 {
155         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
156         int ret = 0, retry, i;
157         struct vop_device *vpdev = vdev->vpdev;
158         struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
159         struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
160         s8 db = bootparam->h2c_config_db;
161
162         mutex_lock(&vi->vop_mutex);
163         for (i = 0; i < vdev->dd->num_vq; i++)
164                 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
165
166         if (db == -1 || vdev->dd->type == -1) {
167                 ret = -EIO;
168                 goto exit;
169         }
170
171         memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
172         vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
173         vpdev->hw_ops->send_intr(vpdev, db);
174
175         for (retry = 100; retry--;) {
176                 ret = wait_event_timeout(wake, vdev->dc->guest_ack,
177                                          msecs_to_jiffies(100));
178                 if (ret)
179                         break;
180         }
181
182         dev_dbg(vop_dev(vdev),
183                 "%s %d retry: %d\n", __func__, __LINE__, retry);
184         vdev->dc->config_change = 0;
185         vdev->dc->guest_ack = 0;
186 exit:
187         for (i = 0; i < vdev->dd->num_vq; i++)
188                 mutex_unlock(&vdev->vvr[i].vr_mutex);
189         mutex_unlock(&vi->vop_mutex);
190         return ret;
191 }
192
193 static int vop_copy_dp_entry(struct vop_vdev *vdev,
194                              struct mic_device_desc *argp, __u8 *type,
195                              struct mic_device_desc **devpage)
196 {
197         struct vop_device *vpdev = vdev->vpdev;
198         struct mic_device_desc *devp;
199         struct mic_vqconfig *vqconfig;
200         int ret = 0, i;
201         bool slot_found = false;
202
203         vqconfig = mic_vq_config(argp);
204         for (i = 0; i < argp->num_vq; i++) {
205                 if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
206                         ret =  -EINVAL;
207                         dev_err(vop_dev(vdev), "%s %d err %d\n",
208                                 __func__, __LINE__, ret);
209                         goto exit;
210                 }
211         }
212
213         /* Find the first free device page entry */
214         for (i = sizeof(struct mic_bootparam);
215                 i < MIC_DP_SIZE - mic_total_desc_size(argp);
216                 i += mic_total_desc_size(devp)) {
217                 devp = vpdev->hw_ops->get_dp(vpdev) + i;
218                 if (devp->type == 0 || devp->type == -1) {
219                         slot_found = true;
220                         break;
221                 }
222         }
223         if (!slot_found) {
224                 ret =  -EINVAL;
225                 dev_err(vop_dev(vdev), "%s %d err %d\n",
226                         __func__, __LINE__, ret);
227                 goto exit;
228         }
229         /*
230          * Save off the type before doing the memcpy. Type will be set in the
231          * end after completing all initialization for the new device.
232          */
233         *type = argp->type;
234         argp->type = 0;
235         memcpy(devp, argp, mic_desc_size(argp));
236
237         *devpage = devp;
238 exit:
239         return ret;
240 }
241
242 static void vop_init_device_ctrl(struct vop_vdev *vdev,
243                                  struct mic_device_desc *devpage)
244 {
245         struct mic_device_ctrl *dc;
246
247         dc = (void *)devpage + mic_aligned_desc_size(devpage);
248
249         dc->config_change = 0;
250         dc->guest_ack = 0;
251         dc->vdev_reset = 0;
252         dc->host_ack = 0;
253         dc->used_address_updated = 0;
254         dc->c2h_vdev_db = -1;
255         dc->h2c_vdev_db = -1;
256         vdev->dc = dc;
257 }
258
259 static int vop_virtio_add_device(struct vop_vdev *vdev,
260                                  struct mic_device_desc *argp)
261 {
262         struct vop_info *vi = vdev->vi;
263         struct vop_device *vpdev = vi->vpdev;
264         struct mic_device_desc *dd = NULL;
265         struct mic_vqconfig *vqconfig;
266         int vr_size, i, j, ret;
267         u8 type = 0;
268         s8 db = -1;
269         char irqname[16];
270         struct mic_bootparam *bootparam;
271         u16 num;
272         dma_addr_t vr_addr;
273
274         bootparam = vpdev->hw_ops->get_dp(vpdev);
275         init_waitqueue_head(&vdev->waitq);
276         INIT_LIST_HEAD(&vdev->list);
277         vdev->vpdev = vpdev;
278
279         ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
280         if (ret) {
281                 dev_err(vop_dev(vdev), "%s %d err %d\n",
282                         __func__, __LINE__, ret);
283                 return ret;
284         }
285
286         vop_init_device_ctrl(vdev, dd);
287
288         vdev->dd = dd;
289         vdev->virtio_id = type;
290         vqconfig = mic_vq_config(dd);
291         INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
292
293         for (i = 0; i < dd->num_vq; i++) {
294                 struct vop_vringh *vvr = &vdev->vvr[i];
295                 struct mic_vring *vr = &vdev->vvr[i].vring;
296
297                 num = le16_to_cpu(vqconfig[i].num);
298                 mutex_init(&vvr->vr_mutex);
299                 vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
300                         sizeof(struct _mic_vring_info));
301                 vr->va = (void *)
302                         __get_free_pages(GFP_KERNEL | __GFP_ZERO,
303                                          get_order(vr_size));
304                 if (!vr->va) {
305                         ret = -ENOMEM;
306                         dev_err(vop_dev(vdev), "%s %d err %d\n",
307                                 __func__, __LINE__, ret);
308                         goto err;
309                 }
310                 vr->len = vr_size;
311                 vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
312                 vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
313                 vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
314                                          DMA_BIDIRECTIONAL);
315                 if (dma_mapping_error(&vpdev->dev, vr_addr)) {
316                         free_pages((unsigned long)vr->va, get_order(vr_size));
317                         ret = -ENOMEM;
318                         dev_err(vop_dev(vdev), "%s %d err %d\n",
319                                 __func__, __LINE__, ret);
320                         goto err;
321                 }
322                 vqconfig[i].address = cpu_to_le64(vr_addr);
323
324                 vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
325                 ret = vringh_init_kern(&vvr->vrh,
326                                        *(u32 *)mic_vq_features(vdev->dd),
327                                        num, false, vr->vr.desc, vr->vr.avail,
328                                        vr->vr.used);
329                 if (ret) {
330                         dev_err(vop_dev(vdev), "%s %d err %d\n",
331                                 __func__, __LINE__, ret);
332                         goto err;
333                 }
334                 vringh_kiov_init(&vvr->riov, NULL, 0);
335                 vringh_kiov_init(&vvr->wiov, NULL, 0);
336                 vvr->head = USHRT_MAX;
337                 vvr->vdev = vdev;
338                 vvr->vrh.notify = _vop_notify;
339                 dev_dbg(&vpdev->dev,
340                         "%s %d index %d va %p info %p vr_size 0x%x\n",
341                         __func__, __LINE__, i, vr->va, vr->info, vr_size);
342                 vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
343                                         get_order(VOP_INT_DMA_BUF_SIZE));
344                 vvr->buf_da = dma_map_single(&vpdev->dev,
345                                           vvr->buf, VOP_INT_DMA_BUF_SIZE,
346                                           DMA_BIDIRECTIONAL);
347         }
348
349         snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
350                  vdev->virtio_id);
351         vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
352         vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
353                         _vop_virtio_intr_handler, irqname, vdev,
354                         vdev->virtio_db);
355         if (IS_ERR(vdev->virtio_cookie)) {
356                 ret = PTR_ERR(vdev->virtio_cookie);
357                 dev_dbg(&vpdev->dev, "request irq failed\n");
358                 goto err;
359         }
360
361         vdev->dc->c2h_vdev_db = vdev->virtio_db;
362
363         /*
364          * Order the type update with previous stores. This write barrier
365          * is paired with the corresponding read barrier before the uncached
366          * system memory read of the type, on the card while scanning the
367          * device page.
368          */
369         smp_wmb();
370         dd->type = type;
371         argp->type = type;
372
373         if (bootparam) {
374                 db = bootparam->h2c_config_db;
375                 if (db != -1)
376                         vpdev->hw_ops->send_intr(vpdev, db);
377         }
378         dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
379         return 0;
380 err:
381         vqconfig = mic_vq_config(dd);
382         for (j = 0; j < i; j++) {
383                 struct vop_vringh *vvr = &vdev->vvr[j];
384
385                 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
386                                  vvr->vring.len, DMA_BIDIRECTIONAL);
387                 free_pages((unsigned long)vvr->vring.va,
388                            get_order(vvr->vring.len));
389         }
390         return ret;
391 }
392
393 static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
394                            struct vop_device *vpdev)
395 {
396         struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
397         s8 db;
398         int ret, retry;
399         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
400
401         devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
402         db = bootparam->h2c_config_db;
403         if (db != -1)
404                 vpdev->hw_ops->send_intr(vpdev, db);
405         else
406                 goto done;
407         for (retry = 15; retry--;) {
408                 ret = wait_event_timeout(wake, devp->guest_ack,
409                                          msecs_to_jiffies(1000));
410                 if (ret)
411                         break;
412         }
413 done:
414         devp->config_change = 0;
415         devp->guest_ack = 0;
416 }
417
418 static void vop_virtio_del_device(struct vop_vdev *vdev)
419 {
420         struct vop_info *vi = vdev->vi;
421         struct vop_device *vpdev = vdev->vpdev;
422         int i;
423         struct mic_vqconfig *vqconfig;
424         struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
425
426         if (!bootparam)
427                 goto skip_hot_remove;
428         vop_dev_remove(vi, vdev->dc, vpdev);
429 skip_hot_remove:
430         vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
431         flush_work(&vdev->virtio_bh_work);
432         vqconfig = mic_vq_config(vdev->dd);
433         for (i = 0; i < vdev->dd->num_vq; i++) {
434                 struct vop_vringh *vvr = &vdev->vvr[i];
435
436                 dma_unmap_single(&vpdev->dev,
437                                  vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
438                                  DMA_BIDIRECTIONAL);
439                 free_pages((unsigned long)vvr->buf,
440                            get_order(VOP_INT_DMA_BUF_SIZE));
441                 vringh_kiov_cleanup(&vvr->riov);
442                 vringh_kiov_cleanup(&vvr->wiov);
443                 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
444                                  vvr->vring.len, DMA_BIDIRECTIONAL);
445                 free_pages((unsigned long)vvr->vring.va,
446                            get_order(vvr->vring.len));
447         }
448         /*
449          * Order the type update with previous stores. This write barrier
450          * is paired with the corresponding read barrier before the uncached
451          * system memory read of the type, on the card while scanning the
452          * device page.
453          */
454         smp_wmb();
455         vdev->dd->type = -1;
456 }
457
458 /*
459  * vop_sync_dma - Wrapper for synchronous DMAs.
460  *
461  * @dev - The address of the pointer to the device instance used
462  * for DMA registration.
463  * @dst - destination DMA address.
464  * @src - source DMA address.
465  * @len - size of the transfer.
466  *
467  * Return DMA_SUCCESS on success
468  */
469 static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
470                         size_t len)
471 {
472         int err = 0;
473         struct dma_device *ddev;
474         struct dma_async_tx_descriptor *tx;
475         struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
476         struct dma_chan *vop_ch = vi->dma_ch;
477
478         if (!vop_ch) {
479                 err = -EBUSY;
480                 goto error;
481         }
482         ddev = vop_ch->device;
483         tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
484                 DMA_PREP_FENCE);
485         if (!tx) {
486                 err = -ENOMEM;
487                 goto error;
488         } else {
489                 dma_cookie_t cookie;
490
491                 cookie = tx->tx_submit(tx);
492                 if (dma_submit_error(cookie)) {
493                         err = -ENOMEM;
494                         goto error;
495                 }
496                 dma_async_issue_pending(vop_ch);
497                 err = dma_sync_wait(vop_ch, cookie);
498         }
499 error:
500         if (err)
501                 dev_err(&vi->vpdev->dev, "%s %d err %d\n",
502                         __func__, __LINE__, err);
503         return err;
504 }
505
506 #define VOP_USE_DMA true
507
508 /*
509  * Initiates the copies across the PCIe bus from card memory to a user
510  * space buffer. When transfers are done using DMA, source/destination
511  * addresses and transfer length must follow the alignment requirements of
512  * the MIC DMA engine.
513  */
514 static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
515                                    size_t len, u64 daddr, size_t dlen,
516                                    int vr_idx)
517 {
518         struct vop_device *vpdev = vdev->vpdev;
519         void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len);
520         struct vop_vringh *vvr = &vdev->vvr[vr_idx];
521         struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
522         size_t dma_alignment;
523         bool x200;
524         size_t dma_offset, partlen;
525         int err;
526
527         if (!VOP_USE_DMA || !vi->dma_ch) {
528                 if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
529                         err = -EFAULT;
530                         dev_err(vop_dev(vdev), "%s %d err %d\n",
531                                 __func__, __LINE__, err);
532                         goto err;
533                 }
534                 vdev->in_bytes += len;
535                 err = 0;
536                 goto err;
537         }
538
539         dma_alignment = 1 << vi->dma_ch->device->copy_align;
540         x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
541
542         dma_offset = daddr - round_down(daddr, dma_alignment);
543         daddr -= dma_offset;
544         len += dma_offset;
545         /*
546          * X100 uses DMA addresses as seen by the card so adding
547          * the aperture base is not required for DMA. However x200
548          * requires DMA addresses to be an offset into the bar so
549          * add the aperture base for x200.
550          */
551         if (x200)
552                 daddr += vpdev->aper->pa;
553         while (len) {
554                 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
555                 err = vop_sync_dma(vdev, vvr->buf_da, daddr,
556                                    ALIGN(partlen, dma_alignment));
557                 if (err) {
558                         dev_err(vop_dev(vdev), "%s %d err %d\n",
559                                 __func__, __LINE__, err);
560                         goto err;
561                 }
562                 if (copy_to_user(ubuf, vvr->buf + dma_offset,
563                                  partlen - dma_offset)) {
564                         err = -EFAULT;
565                         dev_err(vop_dev(vdev), "%s %d err %d\n",
566                                 __func__, __LINE__, err);
567                         goto err;
568                 }
569                 daddr += partlen;
570                 ubuf += partlen;
571                 dbuf += partlen;
572                 vdev->in_bytes_dma += partlen;
573                 vdev->in_bytes += partlen;
574                 len -= partlen;
575                 dma_offset = 0;
576         }
577         err = 0;
578 err:
579         vpdev->hw_ops->unmap(vpdev, dbuf);
580         dev_dbg(vop_dev(vdev),
581                 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
582                 __func__, ubuf, dbuf, len, vr_idx);
583         return err;
584 }
585
586 /*
587  * Initiates copies across the PCIe bus from a user space buffer to card
588  * memory. When transfers are done using DMA, source/destination addresses
589  * and transfer length must follow the alignment requirements of the MIC
590  * DMA engine.
591  */
592 static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
593                                      size_t len, u64 daddr, size_t dlen,
594                                      int vr_idx)
595 {
596         struct vop_device *vpdev = vdev->vpdev;
597         void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len);
598         struct vop_vringh *vvr = &vdev->vvr[vr_idx];
599         struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
600         size_t dma_alignment;
601         bool x200;
602         size_t partlen;
603         bool dma = VOP_USE_DMA && vi->dma_ch;
604         int err = 0;
605
606         if (dma) {
607                 dma_alignment = 1 << vi->dma_ch->device->copy_align;
608                 x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
609
610                 if (daddr & (dma_alignment - 1)) {
611                         vdev->tx_dst_unaligned += len;
612                         dma = false;
613                 } else if (ALIGN(len, dma_alignment) > dlen) {
614                         vdev->tx_len_unaligned += len;
615                         dma = false;
616                 }
617         }
618
619         if (!dma)
620                 goto memcpy;
621
622         /*
623          * X100 uses DMA addresses as seen by the card so adding
624          * the aperture base is not required for DMA. However x200
625          * requires DMA addresses to be an offset into the bar so
626          * add the aperture base for x200.
627          */
628         if (x200)
629                 daddr += vpdev->aper->pa;
630         while (len) {
631                 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
632
633                 if (copy_from_user(vvr->buf, ubuf, partlen)) {
634                         err = -EFAULT;
635                         dev_err(vop_dev(vdev), "%s %d err %d\n",
636                                 __func__, __LINE__, err);
637                         goto err;
638                 }
639                 err = vop_sync_dma(vdev, daddr, vvr->buf_da,
640                                    ALIGN(partlen, dma_alignment));
641                 if (err) {
642                         dev_err(vop_dev(vdev), "%s %d err %d\n",
643                                 __func__, __LINE__, err);
644                         goto err;
645                 }
646                 daddr += partlen;
647                 ubuf += partlen;
648                 dbuf += partlen;
649                 vdev->out_bytes_dma += partlen;
650                 vdev->out_bytes += partlen;
651                 len -= partlen;
652         }
653 memcpy:
654         /*
655          * We are copying to IO below and should ideally use something
656          * like copy_from_user_toio(..) if it existed.
657          */
658         if (copy_from_user((void __force *)dbuf, ubuf, len)) {
659                 err = -EFAULT;
660                 dev_err(vop_dev(vdev), "%s %d err %d\n",
661                         __func__, __LINE__, err);
662                 goto err;
663         }
664         vdev->out_bytes += len;
665         err = 0;
666 err:
667         vpdev->hw_ops->unmap(vpdev, dbuf);
668         dev_dbg(vop_dev(vdev),
669                 "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
670                 __func__, ubuf, dbuf, len, vr_idx);
671         return err;
672 }
673
674 #define MIC_VRINGH_READ true
675
676 /* Determine the total number of bytes consumed in a VRINGH KIOV */
677 static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
678 {
679         int i;
680         u32 total = iov->consumed;
681
682         for (i = 0; i < iov->i; i++)
683                 total += iov->iov[i].iov_len;
684         return total;
685 }
686
687 /*
688  * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
689  * This API is heavily based on the vringh_iov_xfer(..) implementation
690  * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
691  * and vringh_iov_push_kern(..) directly is because there is no
692  * way to override the VRINGH xfer(..) routines as of v3.10.
693  */
694 static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
695                            void __user *ubuf, size_t len, bool read, int vr_idx,
696                            size_t *out_len)
697 {
698         int ret = 0;
699         size_t partlen, tot_len = 0;
700
701         while (len && iov->i < iov->used) {
702                 struct kvec *kiov = &iov->iov[iov->i];
703                 unsigned long daddr = (unsigned long)kiov->iov_base;
704
705                 partlen = min(kiov->iov_len, len);
706                 if (read)
707                         ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
708                                                       daddr,
709                                                       kiov->iov_len,
710                                                       vr_idx);
711                 else
712                         ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
713                                                         daddr,
714                                                         kiov->iov_len,
715                                                         vr_idx);
716                 if (ret) {
717                         dev_err(vop_dev(vdev), "%s %d err %d\n",
718                                 __func__, __LINE__, ret);
719                         break;
720                 }
721                 len -= partlen;
722                 ubuf += partlen;
723                 tot_len += partlen;
724                 iov->consumed += partlen;
725                 kiov->iov_len -= partlen;
726                 kiov->iov_base += partlen;
727                 if (!kiov->iov_len) {
728                         /* Fix up old iov element then increment. */
729                         kiov->iov_len = iov->consumed;
730                         kiov->iov_base -= iov->consumed;
731
732                         iov->consumed = 0;
733                         iov->i++;
734                 }
735         }
736         *out_len = tot_len;
737         return ret;
738 }
739
740 /*
741  * Use the standard VRINGH infrastructure in the kernel to fetch new
742  * descriptors, initiate the copies and update the used ring.
743  */
744 static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
745 {
746         int ret = 0;
747         u32 iovcnt = copy->iovcnt;
748         struct iovec iov;
749         struct iovec __user *u_iov = copy->iov;
750         void __user *ubuf = NULL;
751         struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
752         struct vringh_kiov *riov = &vvr->riov;
753         struct vringh_kiov *wiov = &vvr->wiov;
754         struct vringh *vrh = &vvr->vrh;
755         u16 *head = &vvr->head;
756         struct mic_vring *vr = &vvr->vring;
757         size_t len = 0, out_len;
758
759         copy->out_len = 0;
760         /* Fetch a new IOVEC if all previous elements have been processed */
761         if (riov->i == riov->used && wiov->i == wiov->used) {
762                 ret = vringh_getdesc_kern(vrh, riov, wiov,
763                                           head, GFP_KERNEL);
764                 /* Check if there are available descriptors */
765                 if (ret <= 0)
766                         return ret;
767         }
768         while (iovcnt) {
769                 if (!len) {
770                         /* Copy over a new iovec from user space. */
771                         ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
772                         if (ret) {
773                                 ret = -EINVAL;
774                                 dev_err(vop_dev(vdev), "%s %d err %d\n",
775                                         __func__, __LINE__, ret);
776                                 break;
777                         }
778                         len = iov.iov_len;
779                         ubuf = iov.iov_base;
780                 }
781                 /* Issue all the read descriptors first */
782                 ret = vop_vringh_copy(vdev, riov, ubuf, len,
783                                       MIC_VRINGH_READ, copy->vr_idx, &out_len);
784                 if (ret) {
785                         dev_err(vop_dev(vdev), "%s %d err %d\n",
786                                 __func__, __LINE__, ret);
787                         break;
788                 }
789                 len -= out_len;
790                 ubuf += out_len;
791                 copy->out_len += out_len;
792                 /* Issue the write descriptors next */
793                 ret = vop_vringh_copy(vdev, wiov, ubuf, len,
794                                       !MIC_VRINGH_READ, copy->vr_idx, &out_len);
795                 if (ret) {
796                         dev_err(vop_dev(vdev), "%s %d err %d\n",
797                                 __func__, __LINE__, ret);
798                         break;
799                 }
800                 len -= out_len;
801                 ubuf += out_len;
802                 copy->out_len += out_len;
803                 if (!len) {
804                         /* One user space iovec is now completed */
805                         iovcnt--;
806                         u_iov++;
807                 }
808                 /* Exit loop if all elements in KIOVs have been processed. */
809                 if (riov->i == riov->used && wiov->i == wiov->used)
810                         break;
811         }
812         /*
813          * Update the used ring if a descriptor was available and some data was
814          * copied in/out and the user asked for a used ring update.
815          */
816         if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
817                 u32 total = 0;
818
819                 /* Determine the total data consumed */
820                 total += vop_vringh_iov_consumed(riov);
821                 total += vop_vringh_iov_consumed(wiov);
822                 vringh_complete_kern(vrh, *head, total);
823                 *head = USHRT_MAX;
824                 if (vringh_need_notify_kern(vrh) > 0)
825                         vringh_notify(vrh);
826                 vringh_kiov_cleanup(riov);
827                 vringh_kiov_cleanup(wiov);
828                 /* Update avail idx for user space */
829                 vr->info->avail_idx = vrh->last_avail_idx;
830         }
831         return ret;
832 }
833
834 static inline int vop_verify_copy_args(struct vop_vdev *vdev,
835                                        struct mic_copy_desc *copy)
836 {
837         if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
838                 return -EINVAL;
839         return 0;
840 }
841
842 /* Copy a specified number of virtio descriptors in a chain */
843 static int vop_virtio_copy_desc(struct vop_vdev *vdev,
844                                 struct mic_copy_desc *copy)
845 {
846         int err;
847         struct vop_vringh *vvr;
848
849         err = vop_verify_copy_args(vdev, copy);
850         if (err)
851                 return err;
852
853         vvr = &vdev->vvr[copy->vr_idx];
854         mutex_lock(&vvr->vr_mutex);
855         if (!vop_vdevup(vdev)) {
856                 err = -ENODEV;
857                 dev_err(vop_dev(vdev), "%s %d err %d\n",
858                         __func__, __LINE__, err);
859                 goto err;
860         }
861         err = _vop_virtio_copy(vdev, copy);
862         if (err) {
863                 dev_err(vop_dev(vdev), "%s %d err %d\n",
864                         __func__, __LINE__, err);
865         }
866 err:
867         mutex_unlock(&vvr->vr_mutex);
868         return err;
869 }
870
871 static int vop_open(struct inode *inode, struct file *f)
872 {
873         struct vop_vdev *vdev;
874         struct vop_info *vi = container_of(f->private_data,
875                 struct vop_info, miscdev);
876
877         vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
878         if (!vdev)
879                 return -ENOMEM;
880         vdev->vi = vi;
881         mutex_init(&vdev->vdev_mutex);
882         f->private_data = vdev;
883         init_completion(&vdev->destroy);
884         complete(&vdev->destroy);
885         return 0;
886 }
887
888 static int vop_release(struct inode *inode, struct file *f)
889 {
890         struct vop_vdev *vdev = f->private_data, *vdev_tmp;
891         struct vop_info *vi = vdev->vi;
892         struct list_head *pos, *tmp;
893         bool found = false;
894
895         mutex_lock(&vdev->vdev_mutex);
896         if (vdev->deleted)
897                 goto unlock;
898         mutex_lock(&vi->vop_mutex);
899         list_for_each_safe(pos, tmp, &vi->vdev_list) {
900                 vdev_tmp = list_entry(pos, struct vop_vdev, list);
901                 if (vdev == vdev_tmp) {
902                         vop_virtio_del_device(vdev);
903                         list_del(pos);
904                         found = true;
905                         break;
906                 }
907         }
908         mutex_unlock(&vi->vop_mutex);
909 unlock:
910         mutex_unlock(&vdev->vdev_mutex);
911         if (!found)
912                 wait_for_completion(&vdev->destroy);
913         f->private_data = NULL;
914         kfree(vdev);
915         return 0;
916 }
917
918 static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
919 {
920         struct vop_vdev *vdev = f->private_data;
921         struct vop_info *vi = vdev->vi;
922         void __user *argp = (void __user *)arg;
923         int ret;
924
925         switch (cmd) {
926         case MIC_VIRTIO_ADD_DEVICE:
927         {
928                 struct mic_device_desc dd, *dd_config;
929
930                 if (copy_from_user(&dd, argp, sizeof(dd)))
931                         return -EFAULT;
932
933                 if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
934                     dd.num_vq > MIC_MAX_VRINGS)
935                         return -EINVAL;
936
937                 dd_config = memdup_user(argp, mic_desc_size(&dd));
938                 if (IS_ERR(dd_config))
939                         return PTR_ERR(dd_config);
940
941                 /* Ensure desc has not changed between the two reads */
942                 if (memcmp(&dd, dd_config, sizeof(dd))) {
943                         ret = -EINVAL;
944                         goto free_ret;
945                 }
946                 mutex_lock(&vdev->vdev_mutex);
947                 mutex_lock(&vi->vop_mutex);
948                 ret = vop_virtio_add_device(vdev, dd_config);
949                 if (ret)
950                         goto unlock_ret;
951                 list_add_tail(&vdev->list, &vi->vdev_list);
952 unlock_ret:
953                 mutex_unlock(&vi->vop_mutex);
954                 mutex_unlock(&vdev->vdev_mutex);
955 free_ret:
956                 kfree(dd_config);
957                 return ret;
958         }
959         case MIC_VIRTIO_COPY_DESC:
960         {
961                 struct mic_copy_desc copy;
962
963                 mutex_lock(&vdev->vdev_mutex);
964                 ret = vop_vdev_inited(vdev);
965                 if (ret)
966                         goto _unlock_ret;
967
968                 if (copy_from_user(&copy, argp, sizeof(copy))) {
969                         ret = -EFAULT;
970                         goto _unlock_ret;
971                 }
972
973                 ret = vop_virtio_copy_desc(vdev, &copy);
974                 if (ret < 0)
975                         goto _unlock_ret;
976                 if (copy_to_user(
977                         &((struct mic_copy_desc __user *)argp)->out_len,
978                         &copy.out_len, sizeof(copy.out_len)))
979                         ret = -EFAULT;
980 _unlock_ret:
981                 mutex_unlock(&vdev->vdev_mutex);
982                 return ret;
983         }
984         case MIC_VIRTIO_CONFIG_CHANGE:
985         {
986                 void *buf;
987
988                 mutex_lock(&vdev->vdev_mutex);
989                 ret = vop_vdev_inited(vdev);
990                 if (ret)
991                         goto __unlock_ret;
992                 buf = memdup_user(argp, vdev->dd->config_len);
993                 if (IS_ERR(buf)) {
994                         ret = PTR_ERR(buf);
995                         goto __unlock_ret;
996                 }
997                 ret = vop_virtio_config_change(vdev, buf);
998                 kfree(buf);
999 __unlock_ret:
1000                 mutex_unlock(&vdev->vdev_mutex);
1001                 return ret;
1002         }
1003         default:
1004                 return -ENOIOCTLCMD;
1005         };
1006         return 0;
1007 }
1008
1009 /*
1010  * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
1011  * not when previously enqueued buffers may be available. This means that
1012  * in the card->host (TX) path, when userspace is unblocked by poll it
1013  * must drain all available descriptors or it can stall.
1014  */
1015 static __poll_t vop_poll(struct file *f, poll_table *wait)
1016 {
1017         struct vop_vdev *vdev = f->private_data;
1018         __poll_t mask = 0;
1019
1020         mutex_lock(&vdev->vdev_mutex);
1021         if (vop_vdev_inited(vdev)) {
1022                 mask = EPOLLERR;
1023                 goto done;
1024         }
1025         poll_wait(f, &vdev->waitq, wait);
1026         if (vop_vdev_inited(vdev)) {
1027                 mask = EPOLLERR;
1028         } else if (vdev->poll_wake) {
1029                 vdev->poll_wake = 0;
1030                 mask = EPOLLIN | EPOLLOUT;
1031         }
1032 done:
1033         mutex_unlock(&vdev->vdev_mutex);
1034         return mask;
1035 }
1036
1037 static inline int
1038 vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
1039                  unsigned long *size, unsigned long *pa)
1040 {
1041         struct vop_device *vpdev = vdev->vpdev;
1042         unsigned long start = MIC_DP_SIZE;
1043         int i;
1044
1045         /*
1046          * MMAP interface is as follows:
1047          * offset                               region
1048          * 0x0                                  virtio device_page
1049          * 0x1000                               first vring
1050          * 0x1000 + size of 1st vring           second vring
1051          * ....
1052          */
1053         if (!offset) {
1054                 *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
1055                 *size = MIC_DP_SIZE;
1056                 return 0;
1057         }
1058
1059         for (i = 0; i < vdev->dd->num_vq; i++) {
1060                 struct vop_vringh *vvr = &vdev->vvr[i];
1061
1062                 if (offset == start) {
1063                         *pa = virt_to_phys(vvr->vring.va);
1064                         *size = vvr->vring.len;
1065                         return 0;
1066                 }
1067                 start += vvr->vring.len;
1068         }
1069         return -1;
1070 }
1071
1072 /*
1073  * Maps the device page and virtio rings to user space for readonly access.
1074  */
1075 static int vop_mmap(struct file *f, struct vm_area_struct *vma)
1076 {
1077         struct vop_vdev *vdev = f->private_data;
1078         unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
1079         unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
1080         int i, err;
1081
1082         err = vop_vdev_inited(vdev);
1083         if (err)
1084                 goto ret;
1085         if (vma->vm_flags & VM_WRITE) {
1086                 err = -EACCES;
1087                 goto ret;
1088         }
1089         while (size_rem) {
1090                 i = vop_query_offset(vdev, offset, &size, &pa);
1091                 if (i < 0) {
1092                         err = -EINVAL;
1093                         goto ret;
1094                 }
1095                 err = remap_pfn_range(vma, vma->vm_start + offset,
1096                                       pa >> PAGE_SHIFT, size,
1097                                       vma->vm_page_prot);
1098                 if (err)
1099                         goto ret;
1100                 size_rem -= size;
1101                 offset += size;
1102         }
1103 ret:
1104         return err;
1105 }
1106
1107 static const struct file_operations vop_fops = {
1108         .open = vop_open,
1109         .release = vop_release,
1110         .unlocked_ioctl = vop_ioctl,
1111         .poll = vop_poll,
1112         .mmap = vop_mmap,
1113         .owner = THIS_MODULE,
1114 };
1115
1116 int vop_host_init(struct vop_info *vi)
1117 {
1118         int rc;
1119         struct miscdevice *mdev;
1120         struct vop_device *vpdev = vi->vpdev;
1121
1122         INIT_LIST_HEAD(&vi->vdev_list);
1123         vi->dma_ch = vpdev->dma_ch;
1124         mdev = &vi->miscdev;
1125         mdev->minor = MISC_DYNAMIC_MINOR;
1126         snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
1127         mdev->name = vi->name;
1128         mdev->fops = &vop_fops;
1129         mdev->parent = &vpdev->dev;
1130
1131         rc = misc_register(mdev);
1132         if (rc)
1133                 dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
1134         return rc;
1135 }
1136
1137 void vop_host_uninit(struct vop_info *vi)
1138 {
1139         struct list_head *pos, *tmp;
1140         struct vop_vdev *vdev;
1141
1142         mutex_lock(&vi->vop_mutex);
1143         vop_virtio_reset_devices(vi);
1144         list_for_each_safe(pos, tmp, &vi->vdev_list) {
1145                 vdev = list_entry(pos, struct vop_vdev, list);
1146                 list_del(pos);
1147                 reinit_completion(&vdev->destroy);
1148                 mutex_unlock(&vi->vop_mutex);
1149                 mutex_lock(&vdev->vdev_mutex);
1150                 vop_virtio_del_device(vdev);
1151                 vdev->deleted = true;
1152                 mutex_unlock(&vdev->vdev_mutex);
1153                 complete(&vdev->destroy);
1154                 mutex_lock(&vi->vop_mutex);
1155         }
1156         mutex_unlock(&vi->vop_mutex);
1157         misc_deregister(&vi->miscdev);
1158 }