Merge tag 'for-linus-5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
[sfrench/cifs-2.6.git] / samples / mic / mpssd / mpssd.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Intel MIC Platform Software Stack (MPSS)
4  *
5  * Copyright(c) 2013 Intel Corporation.
6  *
7  * Intel MIC User Space Tools.
8  */
9
10 #define _GNU_SOURCE
11
12 #include <stdlib.h>
13 #include <fcntl.h>
14 #include <getopt.h>
15 #include <assert.h>
16 #include <unistd.h>
17 #include <stdbool.h>
18 #include <signal.h>
19 #include <poll.h>
20 #include <features.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <sys/mman.h>
24 #include <sys/socket.h>
25 #include <linux/virtio_ring.h>
26 #include <linux/virtio_net.h>
27 #include <linux/virtio_console.h>
28 #include <linux/virtio_blk.h>
29 #include <linux/version.h>
30 #include "mpssd.h"
31 #include <linux/mic_ioctl.h>
32 #include <linux/mic_common.h>
33 #include <tools/endian.h>
34
35 static void *init_mic(void *arg);
36
37 static FILE *logfp;
38 static struct mic_info mic_list;
39
40 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
41
42 #define min_t(type, x, y) ({                            \
43                 type __min1 = (x);                      \
44                 type __min2 = (y);                      \
45                 __min1 < __min2 ? __min1 : __min2; })
46
47 /* align addr on a size boundary - adjust address up/down if needed */
48 #define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
49 #define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
50
51 /* align addr on a size boundary - adjust address up if needed */
52 #define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
53
54 /* to align the pointer to the (next) page boundary */
55 #define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
56
57 #define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
58
59 #define GSO_ENABLED             1
60 #define MAX_GSO_SIZE            (64 * 1024)
61 #define ETH_H_LEN               14
62 #define MAX_NET_PKT_SIZE        (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
63 #define MIC_DEVICE_PAGE_END     0x1000
64
65 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
66 #define VIRTIO_NET_HDR_F_DATA_VALID     2       /* Csum is valid */
67 #endif
68
69 static struct {
70         struct mic_device_desc dd;
71         struct mic_vqconfig vqconfig[2];
72         __u32 host_features, guest_acknowledgements;
73         struct virtio_console_config cons_config;
74 } virtcons_dev_page = {
75         .dd = {
76                 .type = VIRTIO_ID_CONSOLE,
77                 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
78                 .feature_len = sizeof(virtcons_dev_page.host_features),
79                 .config_len = sizeof(virtcons_dev_page.cons_config),
80         },
81         .vqconfig[0] = {
82                 .num = htole16(MIC_VRING_ENTRIES),
83         },
84         .vqconfig[1] = {
85                 .num = htole16(MIC_VRING_ENTRIES),
86         },
87 };
88
89 static struct {
90         struct mic_device_desc dd;
91         struct mic_vqconfig vqconfig[2];
92         __u32 host_features, guest_acknowledgements;
93         struct virtio_net_config net_config;
94 } virtnet_dev_page = {
95         .dd = {
96                 .type = VIRTIO_ID_NET,
97                 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
98                 .feature_len = sizeof(virtnet_dev_page.host_features),
99                 .config_len = sizeof(virtnet_dev_page.net_config),
100         },
101         .vqconfig[0] = {
102                 .num = htole16(MIC_VRING_ENTRIES),
103         },
104         .vqconfig[1] = {
105                 .num = htole16(MIC_VRING_ENTRIES),
106         },
107 #if GSO_ENABLED
108         .host_features = htole32(
109                 1 << VIRTIO_NET_F_CSUM |
110                 1 << VIRTIO_NET_F_GSO |
111                 1 << VIRTIO_NET_F_GUEST_TSO4 |
112                 1 << VIRTIO_NET_F_GUEST_TSO6 |
113                 1 << VIRTIO_NET_F_GUEST_ECN),
114 #else
115                 .host_features = 0,
116 #endif
117 };
118
119 static const char *mic_config_dir = "/etc/mpss";
120 static const char *virtblk_backend = "VIRTBLK_BACKEND";
121 static struct {
122         struct mic_device_desc dd;
123         struct mic_vqconfig vqconfig[1];
124         __u32 host_features, guest_acknowledgements;
125         struct virtio_blk_config blk_config;
126 } virtblk_dev_page = {
127         .dd = {
128                 .type = VIRTIO_ID_BLOCK,
129                 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
130                 .feature_len = sizeof(virtblk_dev_page.host_features),
131                 .config_len = sizeof(virtblk_dev_page.blk_config),
132         },
133         .vqconfig[0] = {
134                 .num = htole16(MIC_VRING_ENTRIES),
135         },
136         .host_features =
137                 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
138         .blk_config = {
139                 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
140                 .capacity = htole64(0),
141          }
142 };
143
144 static char *myname;
145
146 static int
147 tap_configure(struct mic_info *mic, char *dev)
148 {
149         pid_t pid;
150         char *ifargv[7];
151         char ipaddr[IFNAMSIZ];
152         int ret = 0;
153
154         pid = fork();
155         if (pid == 0) {
156                 ifargv[0] = "ip";
157                 ifargv[1] = "link";
158                 ifargv[2] = "set";
159                 ifargv[3] = dev;
160                 ifargv[4] = "up";
161                 ifargv[5] = NULL;
162                 mpsslog("Configuring %s\n", dev);
163                 ret = execvp("ip", ifargv);
164                 if (ret < 0) {
165                         mpsslog("%s execvp failed errno %s\n",
166                                 mic->name, strerror(errno));
167                         return ret;
168                 }
169         }
170         if (pid < 0) {
171                 mpsslog("%s fork failed errno %s\n",
172                         mic->name, strerror(errno));
173                 return ret;
174         }
175
176         ret = waitpid(pid, NULL, 0);
177         if (ret < 0) {
178                 mpsslog("%s waitpid failed errno %s\n",
179                         mic->name, strerror(errno));
180                 return ret;
181         }
182
183         snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
184
185         pid = fork();
186         if (pid == 0) {
187                 ifargv[0] = "ip";
188                 ifargv[1] = "addr";
189                 ifargv[2] = "add";
190                 ifargv[3] = ipaddr;
191                 ifargv[4] = "dev";
192                 ifargv[5] = dev;
193                 ifargv[6] = NULL;
194                 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
195                 ret = execvp("ip", ifargv);
196                 if (ret < 0) {
197                         mpsslog("%s execvp failed errno %s\n",
198                                 mic->name, strerror(errno));
199                         return ret;
200                 }
201         }
202         if (pid < 0) {
203                 mpsslog("%s fork failed errno %s\n",
204                         mic->name, strerror(errno));
205                 return ret;
206         }
207
208         ret = waitpid(pid, NULL, 0);
209         if (ret < 0) {
210                 mpsslog("%s waitpid failed errno %s\n",
211                         mic->name, strerror(errno));
212                 return ret;
213         }
214         mpsslog("MIC name %s %s %d DONE!\n",
215                 mic->name, __func__, __LINE__);
216         return 0;
217 }
218
219 static int tun_alloc(struct mic_info *mic, char *dev)
220 {
221         struct ifreq ifr;
222         int fd, err;
223 #if GSO_ENABLED
224         unsigned offload;
225 #endif
226         fd = open("/dev/net/tun", O_RDWR);
227         if (fd < 0) {
228                 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
229                 goto done;
230         }
231
232         memset(&ifr, 0, sizeof(ifr));
233
234         ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
235         if (*dev)
236                 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
237
238         err = ioctl(fd, TUNSETIFF, (void *)&ifr);
239         if (err < 0) {
240                 mpsslog("%s %s %d TUNSETIFF failed %s\n",
241                         mic->name, __func__, __LINE__, strerror(errno));
242                 close(fd);
243                 return err;
244         }
245 #if GSO_ENABLED
246         offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
247
248         err = ioctl(fd, TUNSETOFFLOAD, offload);
249         if (err < 0) {
250                 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
251                         mic->name, __func__, __LINE__, strerror(errno));
252                 close(fd);
253                 return err;
254         }
255 #endif
256         strcpy(dev, ifr.ifr_name);
257         mpsslog("Created TAP %s\n", dev);
258 done:
259         return fd;
260 }
261
262 #define NET_FD_VIRTIO_NET 0
263 #define NET_FD_TUN 1
264 #define MAX_NET_FD 2
265
266 static void set_dp(struct mic_info *mic, int type, void *dp)
267 {
268         switch (type) {
269         case VIRTIO_ID_CONSOLE:
270                 mic->mic_console.console_dp = dp;
271                 return;
272         case VIRTIO_ID_NET:
273                 mic->mic_net.net_dp = dp;
274                 return;
275         case VIRTIO_ID_BLOCK:
276                 mic->mic_virtblk.block_dp = dp;
277                 return;
278         }
279         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
280         assert(0);
281 }
282
283 static void *get_dp(struct mic_info *mic, int type)
284 {
285         switch (type) {
286         case VIRTIO_ID_CONSOLE:
287                 return mic->mic_console.console_dp;
288         case VIRTIO_ID_NET:
289                 return mic->mic_net.net_dp;
290         case VIRTIO_ID_BLOCK:
291                 return mic->mic_virtblk.block_dp;
292         }
293         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
294         assert(0);
295         return NULL;
296 }
297
298 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
299 {
300         struct mic_device_desc *d;
301         int i;
302         void *dp = get_dp(mic, type);
303
304         for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
305                 i += mic_total_desc_size(d)) {
306                 d = dp + i;
307
308                 /* End of list */
309                 if (d->type == 0)
310                         break;
311
312                 if (d->type == -1)
313                         continue;
314
315                 mpsslog("%s %s d-> type %d d %p\n",
316                         mic->name, __func__, d->type, d);
317
318                 if (d->type == (__u8)type)
319                         return d;
320         }
321         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
322         return NULL;
323 }
324
325 /* See comments in vhost.c for explanation of next_desc() */
326 static unsigned next_desc(struct vring_desc *desc)
327 {
328         unsigned int next;
329
330         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
331                 return -1U;
332         next = le16toh(desc->next);
333         return next;
334 }
335
336 /* Sum up all the IOVEC length */
337 static ssize_t
338 sum_iovec_len(struct mic_copy_desc *copy)
339 {
340         ssize_t sum = 0;
341         unsigned int i;
342
343         for (i = 0; i < copy->iovcnt; i++)
344                 sum += copy->iov[i].iov_len;
345         return sum;
346 }
347
348 static inline void verify_out_len(struct mic_info *mic,
349         struct mic_copy_desc *copy)
350 {
351         if (copy->out_len != sum_iovec_len(copy)) {
352                 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
353                         mic->name, __func__, __LINE__,
354                         copy->out_len, sum_iovec_len(copy));
355                 assert(copy->out_len == sum_iovec_len(copy));
356         }
357 }
358
359 /* Display an iovec */
360 static void
361 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
362            const char *s, int line)
363 {
364         unsigned int i;
365
366         for (i = 0; i < copy->iovcnt; i++)
367                 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
368                         mic->name, s, line, i,
369                         copy->iov[i].iov_base, copy->iov[i].iov_len);
370 }
371
372 static inline __u16 read_avail_idx(struct mic_vring *vr)
373 {
374         return READ_ONCE(vr->info->avail_idx);
375 }
376
377 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
378                                 struct mic_copy_desc *copy, ssize_t len)
379 {
380         copy->vr_idx = tx ? 0 : 1;
381         copy->update_used = true;
382         if (type == VIRTIO_ID_NET)
383                 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
384         else
385                 copy->iov[0].iov_len = len;
386 }
387
388 /* Central API which triggers the copies */
389 static int
390 mic_virtio_copy(struct mic_info *mic, int fd,
391                 struct mic_vring *vr, struct mic_copy_desc *copy)
392 {
393         int ret;
394
395         ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
396         if (ret) {
397                 mpsslog("%s %s %d errno %s ret %d\n",
398                         mic->name, __func__, __LINE__,
399                         strerror(errno), ret);
400         }
401         return ret;
402 }
403
404 static inline unsigned _vring_size(unsigned int num, unsigned long align)
405 {
406         return _ALIGN_UP(((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
407                                 + align - 1) & ~(align - 1))
408                 + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num, 4);
409 }
410
411 /*
412  * This initialization routine requires at least one
413  * vring i.e. vr0. vr1 is optional.
414  */
415 static void *
416 init_vr(struct mic_info *mic, int fd, int type,
417         struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
418 {
419         int vr_size;
420         char *va;
421
422         vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
423                                          MIC_VIRTIO_RING_ALIGN) +
424                              sizeof(struct _mic_vring_info));
425         va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
426                 PROT_READ, MAP_SHARED, fd, 0);
427         if (MAP_FAILED == va) {
428                 mpsslog("%s %s %d mmap failed errno %s\n",
429                         mic->name, __func__, __LINE__,
430                         strerror(errno));
431                 goto done;
432         }
433         set_dp(mic, type, va);
434         vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
435         vr0->info = vr0->va +
436                 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
437         vring_init(&vr0->vr,
438                    MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
439         mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
440                 __func__, mic->name, vr0->va, vr0->info, vr_size,
441                 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
442         mpsslog("magic 0x%x expected 0x%x\n",
443                 le32toh(vr0->info->magic), MIC_MAGIC + type);
444         assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
445         if (vr1) {
446                 vr1->va = (struct mic_vring *)
447                         &va[MIC_DEVICE_PAGE_END + vr_size];
448                 vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
449                         MIC_VIRTIO_RING_ALIGN);
450                 vring_init(&vr1->vr,
451                            MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
452                 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
453                         __func__, mic->name, vr1->va, vr1->info, vr_size,
454                         _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
455                 mpsslog("magic 0x%x expected 0x%x\n",
456                         le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
457                 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
458         }
459 done:
460         return va;
461 }
462
463 static int
464 wait_for_card_driver(struct mic_info *mic, int fd, int type)
465 {
466         struct pollfd pollfd;
467         int err;
468         struct mic_device_desc *desc = get_device_desc(mic, type);
469         __u8 prev_status;
470
471         if (!desc)
472                 return -ENODEV;
473         prev_status = desc->status;
474         pollfd.fd = fd;
475         mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
476                 mic->name, __func__, type, desc->status);
477
478         while (1) {
479                 pollfd.events = POLLIN;
480                 pollfd.revents = 0;
481                 err = poll(&pollfd, 1, -1);
482                 if (err < 0) {
483                         mpsslog("%s %s poll failed %s\n",
484                                 mic->name, __func__, strerror(errno));
485                         continue;
486                 }
487
488                 if (pollfd.revents) {
489                         if (desc->status != prev_status) {
490                                 mpsslog("%s %s Waiting... desc-> type %d "
491                                         "status 0x%x\n",
492                                         mic->name, __func__, type,
493                                         desc->status);
494                                 prev_status = desc->status;
495                         }
496                         if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
497                                 mpsslog("%s %s poll.revents %d\n",
498                                         mic->name, __func__, pollfd.revents);
499                                 mpsslog("%s %s desc-> type %d status 0x%x\n",
500                                         mic->name, __func__, type,
501                                         desc->status);
502                                 break;
503                         }
504                 }
505         }
506         return 0;
507 }
508
509 /* Spin till we have some descriptors */
510 static void
511 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
512 {
513         __u16 avail_idx = read_avail_idx(vr);
514
515         while (avail_idx == le16toh(READ_ONCE(vr->vr.avail->idx))) {
516 #ifdef DEBUG
517                 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
518                         mic->name, __func__,
519                         le16toh(vr->vr.avail->idx), vr->info->avail_idx);
520 #endif
521                 sched_yield();
522         }
523 }
524
525 static void *
526 virtio_net(void *arg)
527 {
528         static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
529         static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
530         struct iovec vnet_iov[2][2] = {
531                 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
532                   { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
533                 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
534                   { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
535         };
536         struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
537         struct mic_info *mic = (struct mic_info *)arg;
538         char if_name[IFNAMSIZ];
539         struct pollfd net_poll[MAX_NET_FD];
540         struct mic_vring tx_vr, rx_vr;
541         struct mic_copy_desc copy;
542         struct mic_device_desc *desc;
543         int err;
544
545         snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
546         mic->mic_net.tap_fd = tun_alloc(mic, if_name);
547         if (mic->mic_net.tap_fd < 0)
548                 goto done;
549
550         if (tap_configure(mic, if_name))
551                 goto done;
552         mpsslog("MIC name %s id %d\n", mic->name, mic->id);
553
554         net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
555         net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
556         net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
557         net_poll[NET_FD_TUN].events = POLLIN;
558
559         if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
560                                   VIRTIO_ID_NET, &tx_vr, &rx_vr,
561                 virtnet_dev_page.dd.num_vq)) {
562                 mpsslog("%s init_vr failed %s\n",
563                         mic->name, strerror(errno));
564                 goto done;
565         }
566
567         copy.iovcnt = 2;
568         desc = get_device_desc(mic, VIRTIO_ID_NET);
569
570         while (1) {
571                 ssize_t len;
572
573                 net_poll[NET_FD_VIRTIO_NET].revents = 0;
574                 net_poll[NET_FD_TUN].revents = 0;
575
576                 /* Start polling for data from tap and virtio net */
577                 err = poll(net_poll, 2, -1);
578                 if (err < 0) {
579                         mpsslog("%s poll failed %s\n",
580                                 __func__, strerror(errno));
581                         continue;
582                 }
583                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
584                         err = wait_for_card_driver(mic,
585                                                    mic->mic_net.virtio_net_fd,
586                                                    VIRTIO_ID_NET);
587                         if (err) {
588                                 mpsslog("%s %s %d Exiting...\n",
589                                         mic->name, __func__, __LINE__);
590                                 break;
591                         }
592                 }
593                 /*
594                  * Check if there is data to be read from TUN and write to
595                  * virtio net fd if there is.
596                  */
597                 if (net_poll[NET_FD_TUN].revents & POLLIN) {
598                         copy.iov = iov0;
599                         len = readv(net_poll[NET_FD_TUN].fd,
600                                 copy.iov, copy.iovcnt);
601                         if (len > 0) {
602                                 struct virtio_net_hdr *hdr
603                                         = (struct virtio_net_hdr *)vnet_hdr[0];
604
605                                 /* Disable checksums on the card since we are on
606                                    a reliable PCIe link */
607                                 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
608 #ifdef DEBUG
609                                 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
610                                         __func__, __LINE__, hdr->flags);
611                                 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
612                                         copy.out_len, hdr->gso_type);
613 #endif
614 #ifdef DEBUG
615                                 disp_iovec(mic, &copy, __func__, __LINE__);
616                                 mpsslog("%s %s %d read from tap 0x%lx\n",
617                                         mic->name, __func__, __LINE__,
618                                         len);
619 #endif
620                                 spin_for_descriptors(mic, &tx_vr);
621                                 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
622                                              len);
623
624                                 err = mic_virtio_copy(mic,
625                                         mic->mic_net.virtio_net_fd, &tx_vr,
626                                         &copy);
627                                 if (err < 0) {
628                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
629                                                 mic->name, __func__, __LINE__,
630                                                 strerror(errno));
631                                 }
632                                 if (!err)
633                                         verify_out_len(mic, &copy);
634 #ifdef DEBUG
635                                 disp_iovec(mic, &copy, __func__, __LINE__);
636                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
637                                         mic->name, __func__, __LINE__,
638                                         sum_iovec_len(&copy));
639 #endif
640                                 /* Reinitialize IOV for next run */
641                                 iov0[1].iov_len = MAX_NET_PKT_SIZE;
642                         } else if (len < 0) {
643                                 disp_iovec(mic, &copy, __func__, __LINE__);
644                                 mpsslog("%s %s %d read failed %s ", mic->name,
645                                         __func__, __LINE__, strerror(errno));
646                                 mpsslog("cnt %d sum %zd\n",
647                                         copy.iovcnt, sum_iovec_len(&copy));
648                         }
649                 }
650
651                 /*
652                  * Check if there is data to be read from virtio net and
653                  * write to TUN if there is.
654                  */
655                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
656                         while (rx_vr.info->avail_idx !=
657                                 le16toh(rx_vr.vr.avail->idx)) {
658                                 copy.iov = iov1;
659                                 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
660                                              MAX_NET_PKT_SIZE
661                                         + sizeof(struct virtio_net_hdr));
662
663                                 err = mic_virtio_copy(mic,
664                                         mic->mic_net.virtio_net_fd, &rx_vr,
665                                         &copy);
666                                 if (!err) {
667 #ifdef DEBUG
668                                         struct virtio_net_hdr *hdr
669                                                 = (struct virtio_net_hdr *)
670                                                         vnet_hdr[1];
671
672                                         mpsslog("%s %s %d hdr->flags 0x%x, ",
673                                                 mic->name, __func__, __LINE__,
674                                                 hdr->flags);
675                                         mpsslog("out_len %d gso_type 0x%x\n",
676                                                 copy.out_len,
677                                                 hdr->gso_type);
678 #endif
679                                         /* Set the correct output iov_len */
680                                         iov1[1].iov_len = copy.out_len -
681                                                 sizeof(struct virtio_net_hdr);
682                                         verify_out_len(mic, &copy);
683 #ifdef DEBUG
684                                         disp_iovec(mic, &copy, __func__,
685                                                    __LINE__);
686                                         mpsslog("%s %s %d ",
687                                                 mic->name, __func__, __LINE__);
688                                         mpsslog("read from net 0x%lx\n",
689                                                 sum_iovec_len(&copy));
690 #endif
691                                         len = writev(net_poll[NET_FD_TUN].fd,
692                                                 copy.iov, copy.iovcnt);
693                                         if (len != sum_iovec_len(&copy)) {
694                                                 mpsslog("Tun write failed %s ",
695                                                         strerror(errno));
696                                                 mpsslog("len 0x%zx ", len);
697                                                 mpsslog("read_len 0x%zx\n",
698                                                         sum_iovec_len(&copy));
699                                         } else {
700 #ifdef DEBUG
701                                                 disp_iovec(mic, &copy, __func__,
702                                                            __LINE__);
703                                                 mpsslog("%s %s %d ",
704                                                         mic->name, __func__,
705                                                         __LINE__);
706                                                 mpsslog("wrote to tap 0x%lx\n",
707                                                         len);
708 #endif
709                                         }
710                                 } else {
711                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
712                                                 mic->name, __func__, __LINE__,
713                                                 strerror(errno));
714                                         break;
715                                 }
716                         }
717                 }
718                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
719                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
720         }
721 done:
722         pthread_exit(NULL);
723 }
724
725 /* virtio_console */
726 #define VIRTIO_CONSOLE_FD 0
727 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
728 #define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
729 #define MAX_BUFFER_SIZE PAGE_SIZE
730
731 static void *
732 virtio_console(void *arg)
733 {
734         static __u8 vcons_buf[2][PAGE_SIZE];
735         struct iovec vcons_iov[2] = {
736                 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
737                 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
738         };
739         struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
740         struct mic_info *mic = (struct mic_info *)arg;
741         int err;
742         struct pollfd console_poll[MAX_CONSOLE_FD];
743         int pty_fd;
744         char *pts_name;
745         ssize_t len;
746         struct mic_vring tx_vr, rx_vr;
747         struct mic_copy_desc copy;
748         struct mic_device_desc *desc;
749
750         pty_fd = posix_openpt(O_RDWR);
751         if (pty_fd < 0) {
752                 mpsslog("can't open a pseudoterminal master device: %s\n",
753                         strerror(errno));
754                 goto _return;
755         }
756         pts_name = ptsname(pty_fd);
757         if (pts_name == NULL) {
758                 mpsslog("can't get pts name\n");
759                 goto _close_pty;
760         }
761         printf("%s console message goes to %s\n", mic->name, pts_name);
762         mpsslog("%s console message goes to %s\n", mic->name, pts_name);
763         err = grantpt(pty_fd);
764         if (err < 0) {
765                 mpsslog("can't grant access: %s %s\n",
766                         pts_name, strerror(errno));
767                 goto _close_pty;
768         }
769         err = unlockpt(pty_fd);
770         if (err < 0) {
771                 mpsslog("can't unlock a pseudoterminal: %s %s\n",
772                         pts_name, strerror(errno));
773                 goto _close_pty;
774         }
775         console_poll[MONITOR_FD].fd = pty_fd;
776         console_poll[MONITOR_FD].events = POLLIN;
777
778         console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
779         console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
780
781         if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
782                                   VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
783                 virtcons_dev_page.dd.num_vq)) {
784                 mpsslog("%s init_vr failed %s\n",
785                         mic->name, strerror(errno));
786                 goto _close_pty;
787         }
788
789         copy.iovcnt = 1;
790         desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
791
792         for (;;) {
793                 console_poll[MONITOR_FD].revents = 0;
794                 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
795                 err = poll(console_poll, MAX_CONSOLE_FD, -1);
796                 if (err < 0) {
797                         mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
798                                 strerror(errno));
799                         continue;
800                 }
801                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
802                         err = wait_for_card_driver(mic,
803                                         mic->mic_console.virtio_console_fd,
804                                         VIRTIO_ID_CONSOLE);
805                         if (err) {
806                                 mpsslog("%s %s %d Exiting...\n",
807                                         mic->name, __func__, __LINE__);
808                                 break;
809                         }
810                 }
811
812                 if (console_poll[MONITOR_FD].revents & POLLIN) {
813                         copy.iov = iov0;
814                         len = readv(pty_fd, copy.iov, copy.iovcnt);
815                         if (len > 0) {
816 #ifdef DEBUG
817                                 disp_iovec(mic, &copy, __func__, __LINE__);
818                                 mpsslog("%s %s %d read from tap 0x%lx\n",
819                                         mic->name, __func__, __LINE__,
820                                         len);
821 #endif
822                                 spin_for_descriptors(mic, &tx_vr);
823                                 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
824                                              &copy, len);
825
826                                 err = mic_virtio_copy(mic,
827                                         mic->mic_console.virtio_console_fd,
828                                         &tx_vr, &copy);
829                                 if (err < 0) {
830                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
831                                                 mic->name, __func__, __LINE__,
832                                                 strerror(errno));
833                                 }
834                                 if (!err)
835                                         verify_out_len(mic, &copy);
836 #ifdef DEBUG
837                                 disp_iovec(mic, &copy, __func__, __LINE__);
838                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
839                                         mic->name, __func__, __LINE__,
840                                         sum_iovec_len(&copy));
841 #endif
842                                 /* Reinitialize IOV for next run */
843                                 iov0->iov_len = PAGE_SIZE;
844                         } else if (len < 0) {
845                                 disp_iovec(mic, &copy, __func__, __LINE__);
846                                 mpsslog("%s %s %d read failed %s ",
847                                         mic->name, __func__, __LINE__,
848                                         strerror(errno));
849                                 mpsslog("cnt %d sum %zd\n",
850                                         copy.iovcnt, sum_iovec_len(&copy));
851                         }
852                 }
853
854                 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
855                         while (rx_vr.info->avail_idx !=
856                                 le16toh(rx_vr.vr.avail->idx)) {
857                                 copy.iov = iov1;
858                                 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
859                                              &copy, PAGE_SIZE);
860
861                                 err = mic_virtio_copy(mic,
862                                         mic->mic_console.virtio_console_fd,
863                                         &rx_vr, &copy);
864                                 if (!err) {
865                                         /* Set the correct output iov_len */
866                                         iov1->iov_len = copy.out_len;
867                                         verify_out_len(mic, &copy);
868 #ifdef DEBUG
869                                         disp_iovec(mic, &copy, __func__,
870                                                    __LINE__);
871                                         mpsslog("%s %s %d ",
872                                                 mic->name, __func__, __LINE__);
873                                         mpsslog("read from net 0x%lx\n",
874                                                 sum_iovec_len(&copy));
875 #endif
876                                         len = writev(pty_fd,
877                                                 copy.iov, copy.iovcnt);
878                                         if (len != sum_iovec_len(&copy)) {
879                                                 mpsslog("Tun write failed %s ",
880                                                         strerror(errno));
881                                                 mpsslog("len 0x%zx ", len);
882                                                 mpsslog("read_len 0x%zx\n",
883                                                         sum_iovec_len(&copy));
884                                         } else {
885 #ifdef DEBUG
886                                                 disp_iovec(mic, &copy, __func__,
887                                                            __LINE__);
888                                                 mpsslog("%s %s %d ",
889                                                         mic->name, __func__,
890                                                         __LINE__);
891                                                 mpsslog("wrote to tap 0x%lx\n",
892                                                         len);
893 #endif
894                                         }
895                                 } else {
896                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
897                                                 mic->name, __func__, __LINE__,
898                                                 strerror(errno));
899                                         break;
900                                 }
901                         }
902                 }
903                 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
904                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
905         }
906 _close_pty:
907         close(pty_fd);
908 _return:
909         pthread_exit(NULL);
910 }
911
912 static void
913 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
914 {
915         char path[PATH_MAX];
916         int fd, err;
917
918         snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
919         fd = open(path, O_RDWR);
920         if (fd < 0) {
921                 mpsslog("Could not open %s %s\n", path, strerror(errno));
922                 return;
923         }
924
925         err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
926         if (err < 0) {
927                 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
928                 close(fd);
929                 return;
930         }
931         switch (dd->type) {
932         case VIRTIO_ID_NET:
933                 mic->mic_net.virtio_net_fd = fd;
934                 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
935                 break;
936         case VIRTIO_ID_CONSOLE:
937                 mic->mic_console.virtio_console_fd = fd;
938                 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
939                 break;
940         case VIRTIO_ID_BLOCK:
941                 mic->mic_virtblk.virtio_block_fd = fd;
942                 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
943                 break;
944         }
945 }
946
947 static bool
948 set_backend_file(struct mic_info *mic)
949 {
950         FILE *config;
951         char buff[PATH_MAX], *line, *evv, *p;
952
953         snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
954         config = fopen(buff, "r");
955         if (config == NULL)
956                 return false;
957         do {  /* look for "virtblk_backend=XXXX" */
958                 line = fgets(buff, PATH_MAX, config);
959                 if (line == NULL)
960                         break;
961                 if (*line == '#')
962                         continue;
963                 p = strchr(line, '\n');
964                 if (p)
965                         *p = '\0';
966         } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
967         fclose(config);
968         if (line == NULL)
969                 return false;
970         evv = strchr(line, '=');
971         if (evv == NULL)
972                 return false;
973         mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
974         if (mic->mic_virtblk.backend_file == NULL) {
975                 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
976                 return false;
977         }
978         strcpy(mic->mic_virtblk.backend_file, evv + 1);
979         return true;
980 }
981
982 #define SECTOR_SIZE 512
983 static bool
984 set_backend_size(struct mic_info *mic)
985 {
986         mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
987                 SEEK_END);
988         if (mic->mic_virtblk.backend_size < 0) {
989                 mpsslog("%s: can't seek: %s\n",
990                         mic->name, mic->mic_virtblk.backend_file);
991                 return false;
992         }
993         virtblk_dev_page.blk_config.capacity =
994                 mic->mic_virtblk.backend_size / SECTOR_SIZE;
995         if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
996                 virtblk_dev_page.blk_config.capacity++;
997
998         virtblk_dev_page.blk_config.capacity =
999                 htole64(virtblk_dev_page.blk_config.capacity);
1000
1001         return true;
1002 }
1003
1004 static bool
1005 open_backend(struct mic_info *mic)
1006 {
1007         if (!set_backend_file(mic))
1008                 goto _error_exit;
1009         mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1010         if (mic->mic_virtblk.backend < 0) {
1011                 mpsslog("%s: can't open: %s\n", mic->name,
1012                         mic->mic_virtblk.backend_file);
1013                 goto _error_free;
1014         }
1015         if (!set_backend_size(mic))
1016                 goto _error_close;
1017         mic->mic_virtblk.backend_addr = mmap(NULL,
1018                 mic->mic_virtblk.backend_size,
1019                 PROT_READ|PROT_WRITE, MAP_SHARED,
1020                 mic->mic_virtblk.backend, 0L);
1021         if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1022                 mpsslog("%s: can't map: %s %s\n",
1023                         mic->name, mic->mic_virtblk.backend_file,
1024                         strerror(errno));
1025                 goto _error_close;
1026         }
1027         return true;
1028
1029  _error_close:
1030         close(mic->mic_virtblk.backend);
1031  _error_free:
1032         free(mic->mic_virtblk.backend_file);
1033  _error_exit:
1034         return false;
1035 }
1036
1037 static void
1038 close_backend(struct mic_info *mic)
1039 {
1040         munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1041         close(mic->mic_virtblk.backend);
1042         free(mic->mic_virtblk.backend_file);
1043 }
1044
1045 static bool
1046 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1047 {
1048         if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1049                 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1050                         mic->name);
1051                 return false;
1052         }
1053         add_virtio_device(mic, &virtblk_dev_page.dd);
1054         if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1055                                   VIRTIO_ID_BLOCK, vring, NULL,
1056                                   virtblk_dev_page.dd.num_vq)) {
1057                 mpsslog("%s init_vr failed %s\n",
1058                         mic->name, strerror(errno));
1059                 return false;
1060         }
1061         return true;
1062 }
1063
1064 static void
1065 stop_virtblk(struct mic_info *mic)
1066 {
1067         int vr_size, ret;
1068
1069         vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1070                                          MIC_VIRTIO_RING_ALIGN) +
1071                              sizeof(struct _mic_vring_info));
1072         ret = munmap(mic->mic_virtblk.block_dp,
1073                 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1074         if (ret < 0)
1075                 mpsslog("%s munmap errno %d\n", mic->name, errno);
1076         close(mic->mic_virtblk.virtio_block_fd);
1077 }
1078
1079 static __u8
1080 header_error_check(struct vring_desc *desc)
1081 {
1082         if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1083                 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1084                         __func__, __LINE__);
1085                 return -EIO;
1086         }
1087         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1088                 mpsslog("%s() %d: alone\n",
1089                         __func__, __LINE__);
1090                 return -EIO;
1091         }
1092         if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1093                 mpsslog("%s() %d: not read\n",
1094                         __func__, __LINE__);
1095                 return -EIO;
1096         }
1097         return 0;
1098 }
1099
1100 static int
1101 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1102 {
1103         struct iovec iovec;
1104         struct mic_copy_desc copy;
1105
1106         iovec.iov_len = sizeof(*hdr);
1107         iovec.iov_base = hdr;
1108         copy.iov = &iovec;
1109         copy.iovcnt = 1;
1110         copy.vr_idx = 0;  /* only one vring on virtio_block */
1111         copy.update_used = false;  /* do not update used index */
1112         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1113 }
1114
1115 static int
1116 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1117 {
1118         struct mic_copy_desc copy;
1119
1120         copy.iov = iovec;
1121         copy.iovcnt = iovcnt;
1122         copy.vr_idx = 0;  /* only one vring on virtio_block */
1123         copy.update_used = false;  /* do not update used index */
1124         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1125 }
1126
1127 static __u8
1128 status_error_check(struct vring_desc *desc)
1129 {
1130         if (le32toh(desc->len) != sizeof(__u8)) {
1131                 mpsslog("%s() %d: length is not sizeof(status)\n",
1132                         __func__, __LINE__);
1133                 return -EIO;
1134         }
1135         return 0;
1136 }
1137
1138 static int
1139 write_status(int fd, __u8 *status)
1140 {
1141         struct iovec iovec;
1142         struct mic_copy_desc copy;
1143
1144         iovec.iov_base = status;
1145         iovec.iov_len = sizeof(*status);
1146         copy.iov = &iovec;
1147         copy.iovcnt = 1;
1148         copy.vr_idx = 0;  /* only one vring on virtio_block */
1149         copy.update_used = true; /* Update used index */
1150         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1151 }
1152
1153 #ifndef VIRTIO_BLK_T_GET_ID
1154 #define VIRTIO_BLK_T_GET_ID    8
1155 #endif
1156
1157 static void *
1158 virtio_block(void *arg)
1159 {
1160         struct mic_info *mic = (struct mic_info *)arg;
1161         int ret;
1162         struct pollfd block_poll;
1163         struct mic_vring vring;
1164         __u16 avail_idx;
1165         __u32 desc_idx;
1166         struct vring_desc *desc;
1167         struct iovec *iovec, *piov;
1168         __u8 status;
1169         __u32 buffer_desc_idx;
1170         struct virtio_blk_outhdr hdr;
1171         void *fos;
1172
1173         for (;;) {  /* forever */
1174                 if (!open_backend(mic)) { /* No virtblk */
1175                         for (mic->mic_virtblk.signaled = 0;
1176                                 !mic->mic_virtblk.signaled;)
1177                                 sleep(1);
1178                         continue;
1179                 }
1180
1181                 /* backend file is specified. */
1182                 if (!start_virtblk(mic, &vring))
1183                         goto _close_backend;
1184                 iovec = malloc(sizeof(*iovec) *
1185                         le32toh(virtblk_dev_page.blk_config.seg_max));
1186                 if (!iovec) {
1187                         mpsslog("%s: can't alloc iovec: %s\n",
1188                                 mic->name, strerror(ENOMEM));
1189                         goto _stop_virtblk;
1190                 }
1191
1192                 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1193                 block_poll.events = POLLIN;
1194                 for (mic->mic_virtblk.signaled = 0;
1195                      !mic->mic_virtblk.signaled;) {
1196                         block_poll.revents = 0;
1197                                         /* timeout in 1 sec to see signaled */
1198                         ret = poll(&block_poll, 1, 1000);
1199                         if (ret < 0) {
1200                                 mpsslog("%s %d: poll failed: %s\n",
1201                                         __func__, __LINE__,
1202                                         strerror(errno));
1203                                 continue;
1204                         }
1205
1206                         if (!(block_poll.revents & POLLIN)) {
1207 #ifdef DEBUG
1208                                 mpsslog("%s %d: block_poll.revents=0x%x\n",
1209                                         __func__, __LINE__, block_poll.revents);
1210 #endif
1211                                 continue;
1212                         }
1213
1214                         /* POLLIN */
1215                         while (vring.info->avail_idx !=
1216                                 le16toh(vring.vr.avail->idx)) {
1217                                 /* read header element */
1218                                 avail_idx =
1219                                         vring.info->avail_idx &
1220                                         (vring.vr.num - 1);
1221                                 desc_idx = le16toh(
1222                                         vring.vr.avail->ring[avail_idx]);
1223                                 desc = &vring.vr.desc[desc_idx];
1224 #ifdef DEBUG
1225                                 mpsslog("%s() %d: avail_idx=%d ",
1226                                         __func__, __LINE__,
1227                                         vring.info->avail_idx);
1228                                 mpsslog("vring.vr.num=%d desc=%p\n",
1229                                         vring.vr.num, desc);
1230 #endif
1231                                 status = header_error_check(desc);
1232                                 ret = read_header(
1233                                         mic->mic_virtblk.virtio_block_fd,
1234                                         &hdr, desc_idx);
1235                                 if (ret < 0) {
1236                                         mpsslog("%s() %d %s: ret=%d %s\n",
1237                                                 __func__, __LINE__,
1238                                                 mic->name, ret,
1239                                                 strerror(errno));
1240                                         break;
1241                                 }
1242                                 /* buffer element */
1243                                 piov = iovec;
1244                                 status = 0;
1245                                 fos = mic->mic_virtblk.backend_addr +
1246                                         (hdr.sector * SECTOR_SIZE);
1247                                 buffer_desc_idx = next_desc(desc);
1248                                 desc_idx = buffer_desc_idx;
1249                                 for (desc = &vring.vr.desc[buffer_desc_idx];
1250                                      desc->flags & VRING_DESC_F_NEXT;
1251                                      desc_idx = next_desc(desc),
1252                                              desc = &vring.vr.desc[desc_idx]) {
1253                                         piov->iov_len = desc->len;
1254                                         piov->iov_base = fos;
1255                                         piov++;
1256                                         fos += desc->len;
1257                                 }
1258                                 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1259                                 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1260                                         VIRTIO_BLK_T_GET_ID)) {
1261                                         /*
1262                                           VIRTIO_BLK_T_IN - does not do
1263                                           anything. Probably for documenting.
1264                                           VIRTIO_BLK_T_SCSI_CMD - for
1265                                           virtio_scsi.
1266                                           VIRTIO_BLK_T_FLUSH - turned off in
1267                                           config space.
1268                                           VIRTIO_BLK_T_BARRIER - defined but not
1269                                           used in anywhere.
1270                                         */
1271                                         mpsslog("%s() %d: type %x ",
1272                                                 __func__, __LINE__,
1273                                                 hdr.type);
1274                                         mpsslog("is not supported\n");
1275                                         status = -ENOTSUP;
1276
1277                                 } else {
1278                                         ret = transfer_blocks(
1279                                         mic->mic_virtblk.virtio_block_fd,
1280                                                 iovec,
1281                                                 piov - iovec);
1282                                         if (ret < 0 &&
1283                                             status != 0)
1284                                                 status = ret;
1285                                 }
1286                                 /* write status and update used pointer */
1287                                 if (status != 0)
1288                                         status = status_error_check(desc);
1289                                 ret = write_status(
1290                                         mic->mic_virtblk.virtio_block_fd,
1291                                         &status);
1292 #ifdef DEBUG
1293                                 mpsslog("%s() %d: write status=%d on desc=%p\n",
1294                                         __func__, __LINE__,
1295                                         status, desc);
1296 #endif
1297                         }
1298                 }
1299                 free(iovec);
1300 _stop_virtblk:
1301                 stop_virtblk(mic);
1302 _close_backend:
1303                 close_backend(mic);
1304         }  /* forever */
1305
1306         pthread_exit(NULL);
1307 }
1308
1309 static void
1310 reset(struct mic_info *mic)
1311 {
1312 #define RESET_TIMEOUT 120
1313         int i = RESET_TIMEOUT;
1314         setsysfs(mic->name, "state", "reset");
1315         while (i) {
1316                 char *state;
1317                 state = readsysfs(mic->name, "state");
1318                 if (!state)
1319                         goto retry;
1320                 mpsslog("%s: %s %d state %s\n",
1321                         mic->name, __func__, __LINE__, state);
1322
1323                 if (!strcmp(state, "ready")) {
1324                         free(state);
1325                         break;
1326                 }
1327                 free(state);
1328 retry:
1329                 sleep(1);
1330                 i--;
1331         }
1332 }
1333
1334 static int
1335 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1336 {
1337         if (!strcmp(shutdown_status, "nop"))
1338                 return MIC_NOP;
1339         if (!strcmp(shutdown_status, "crashed"))
1340                 return MIC_CRASHED;
1341         if (!strcmp(shutdown_status, "halted"))
1342                 return MIC_HALTED;
1343         if (!strcmp(shutdown_status, "poweroff"))
1344                 return MIC_POWER_OFF;
1345         if (!strcmp(shutdown_status, "restart"))
1346                 return MIC_RESTART;
1347         mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1348         /* Invalid state */
1349         assert(0);
1350 };
1351
1352 static int get_mic_state(struct mic_info *mic)
1353 {
1354         char *state = NULL;
1355         enum mic_states mic_state;
1356
1357         while (!state) {
1358                 state = readsysfs(mic->name, "state");
1359                 sleep(1);
1360         }
1361         mpsslog("%s: %s %d state %s\n",
1362                 mic->name, __func__, __LINE__, state);
1363
1364         if (!strcmp(state, "ready")) {
1365                 mic_state = MIC_READY;
1366         } else if (!strcmp(state, "booting")) {
1367                 mic_state = MIC_BOOTING;
1368         } else if (!strcmp(state, "online")) {
1369                 mic_state = MIC_ONLINE;
1370         } else if (!strcmp(state, "shutting_down")) {
1371                 mic_state = MIC_SHUTTING_DOWN;
1372         } else if (!strcmp(state, "reset_failed")) {
1373                 mic_state = MIC_RESET_FAILED;
1374         } else if (!strcmp(state, "resetting")) {
1375                 mic_state = MIC_RESETTING;
1376         } else {
1377                 mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1378                 assert(0);
1379         }
1380
1381         free(state);
1382         return mic_state;
1383 };
1384
1385 static void mic_handle_shutdown(struct mic_info *mic)
1386 {
1387 #define SHUTDOWN_TIMEOUT 60
1388         int i = SHUTDOWN_TIMEOUT;
1389         char *shutdown_status;
1390         while (i) {
1391                 shutdown_status = readsysfs(mic->name, "shutdown_status");
1392                 if (!shutdown_status) {
1393                         sleep(1);
1394                         continue;
1395                 }
1396                 mpsslog("%s: %s %d shutdown_status %s\n",
1397                         mic->name, __func__, __LINE__, shutdown_status);
1398                 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1399                 case MIC_RESTART:
1400                         mic->restart = 1;
1401                 case MIC_HALTED:
1402                 case MIC_POWER_OFF:
1403                 case MIC_CRASHED:
1404                         free(shutdown_status);
1405                         goto reset;
1406                 default:
1407                         break;
1408                 }
1409                 free(shutdown_status);
1410                 sleep(1);
1411                 i--;
1412         }
1413 reset:
1414         if (!i)
1415                 mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1416                         mic->name, __func__, __LINE__, shutdown_status);
1417         reset(mic);
1418 }
1419
1420 static int open_state_fd(struct mic_info *mic)
1421 {
1422         char pathname[PATH_MAX];
1423         int fd;
1424
1425         snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1426                  MICSYSFSDIR, mic->name, "state");
1427
1428         fd = open(pathname, O_RDONLY);
1429         if (fd < 0)
1430                 mpsslog("%s: opening file %s failed %s\n",
1431                         mic->name, pathname, strerror(errno));
1432         return fd;
1433 }
1434
1435 static int block_till_state_change(int fd, struct mic_info *mic)
1436 {
1437         struct pollfd ufds[1];
1438         char value[PAGE_SIZE];
1439         int ret;
1440
1441         ufds[0].fd = fd;
1442         ufds[0].events = POLLERR | POLLPRI;
1443         ret = poll(ufds, 1, -1);
1444         if (ret < 0) {
1445                 mpsslog("%s: %s %d poll failed %s\n",
1446                         mic->name, __func__, __LINE__, strerror(errno));
1447                 return ret;
1448         }
1449
1450         ret = lseek(fd, 0, SEEK_SET);
1451         if (ret < 0) {
1452                 mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1453                         mic->name, __func__, __LINE__, strerror(errno));
1454                 return ret;
1455         }
1456
1457         ret = read(fd, value, sizeof(value));
1458         if (ret < 0) {
1459                 mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1460                         mic->name, __func__, __LINE__, strerror(errno));
1461                 return ret;
1462         }
1463
1464         return 0;
1465 }
1466
1467 static void *
1468 mic_config(void *arg)
1469 {
1470         struct mic_info *mic = (struct mic_info *)arg;
1471         int fd, ret, stat = 0;
1472
1473         fd = open_state_fd(mic);
1474         if (fd < 0) {
1475                 mpsslog("%s: %s %d open state fd failed %s\n",
1476                         mic->name, __func__, __LINE__, strerror(errno));
1477                 goto exit;
1478         }
1479
1480         do {
1481                 ret = block_till_state_change(fd, mic);
1482                 if (ret < 0) {
1483                         mpsslog("%s: %s %d block_till_state_change error %s\n",
1484                                 mic->name, __func__, __LINE__, strerror(errno));
1485                         goto close_exit;
1486                 }
1487
1488                 switch (get_mic_state(mic)) {
1489                 case MIC_SHUTTING_DOWN:
1490                         mic_handle_shutdown(mic);
1491                         break;
1492                 case MIC_READY:
1493                 case MIC_RESET_FAILED:
1494                         ret = kill(mic->pid, SIGTERM);
1495                         mpsslog("%s: %s %d kill pid %d ret %d\n",
1496                                 mic->name, __func__, __LINE__,
1497                                 mic->pid, ret);
1498                         if (!ret) {
1499                                 ret = waitpid(mic->pid, &stat,
1500                                               WIFSIGNALED(stat));
1501                                 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1502                                         mic->name, __func__, __LINE__,
1503                                         ret, mic->pid);
1504                         }
1505                         if (mic->boot_on_resume) {
1506                                 setsysfs(mic->name, "state", "boot");
1507                                 mic->boot_on_resume = 0;
1508                         }
1509                         goto close_exit;
1510                 default:
1511                         break;
1512                 }
1513         } while (1);
1514
1515 close_exit:
1516         close(fd);
1517 exit:
1518         init_mic(mic);
1519         pthread_exit(NULL);
1520 }
1521
1522 static void
1523 set_cmdline(struct mic_info *mic)
1524 {
1525         char buffer[PATH_MAX];
1526         int len;
1527
1528         len = snprintf(buffer, PATH_MAX,
1529                 "clocksource=tsc highres=off nohz=off ");
1530         len += snprintf(buffer + len, PATH_MAX - len,
1531                 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1532         len += snprintf(buffer + len, PATH_MAX - len,
1533                 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1534                 mic->id + 1);
1535
1536         setsysfs(mic->name, "cmdline", buffer);
1537         mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1538         snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1539         mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1540 }
1541
1542 static void
1543 set_log_buf_info(struct mic_info *mic)
1544 {
1545         int fd;
1546         off_t len;
1547         char system_map[] = "/lib/firmware/mic/System.map";
1548         char *map, *temp, log_buf[17] = {'\0'};
1549
1550         fd = open(system_map, O_RDONLY);
1551         if (fd < 0) {
1552                 mpsslog("%s: Opening System.map failed: %d\n",
1553                         mic->name, errno);
1554                 return;
1555         }
1556         len = lseek(fd, 0, SEEK_END);
1557         if (len < 0) {
1558                 mpsslog("%s: Reading System.map size failed: %d\n",
1559                         mic->name, errno);
1560                 close(fd);
1561                 return;
1562         }
1563         map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1564         if (map == MAP_FAILED) {
1565                 mpsslog("%s: mmap of System.map failed: %d\n",
1566                         mic->name, errno);
1567                 close(fd);
1568                 return;
1569         }
1570         temp = strstr(map, "__log_buf");
1571         if (!temp) {
1572                 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1573                 munmap(map, len);
1574                 close(fd);
1575                 return;
1576         }
1577         strncpy(log_buf, temp - 19, 16);
1578         setsysfs(mic->name, "log_buf_addr", log_buf);
1579         mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1580         temp = strstr(map, "log_buf_len");
1581         if (!temp) {
1582                 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1583                 munmap(map, len);
1584                 close(fd);
1585                 return;
1586         }
1587         strncpy(log_buf, temp - 19, 16);
1588         setsysfs(mic->name, "log_buf_len", log_buf);
1589         mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1590         munmap(map, len);
1591         close(fd);
1592 }
1593
1594 static void
1595 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1596 {
1597         struct mic_info *mic;
1598
1599         for (mic = mic_list.next; mic != NULL; mic = mic->next)
1600                 mic->mic_virtblk.signaled = 1/* true */;
1601 }
1602
1603 static void
1604 set_mic_boot_params(struct mic_info *mic)
1605 {
1606         set_log_buf_info(mic);
1607         set_cmdline(mic);
1608 }
1609
1610 static void *
1611 init_mic(void *arg)
1612 {
1613         struct mic_info *mic = (struct mic_info *)arg;
1614         struct sigaction ignore = {
1615                 .sa_flags = 0,
1616                 .sa_handler = SIG_IGN
1617         };
1618         struct sigaction act = {
1619                 .sa_flags = SA_SIGINFO,
1620                 .sa_sigaction = change_virtblk_backend,
1621         };
1622         char buffer[PATH_MAX];
1623         int err, fd;
1624
1625         /*
1626          * Currently, one virtio block device is supported for each MIC card
1627          * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1628          * The signal informs the virtio block backend about a change in the
1629          * configuration file which specifies the virtio backend file name on
1630          * the host. Virtio block backend then re-reads the configuration file
1631          * and switches to the new block device. This signalling mechanism may
1632          * not be required once multiple virtio block devices are supported by
1633          * the MIC daemon.
1634          */
1635         sigaction(SIGUSR1, &ignore, NULL);
1636 retry:
1637         fd = open_state_fd(mic);
1638         if (fd < 0) {
1639                 mpsslog("%s: %s %d open state fd failed %s\n",
1640                         mic->name, __func__, __LINE__, strerror(errno));
1641                 sleep(2);
1642                 goto retry;
1643         }
1644
1645         if (mic->restart) {
1646                 snprintf(buffer, PATH_MAX, "boot");
1647                 setsysfs(mic->name, "state", buffer);
1648                 mpsslog("%s restarting mic %d\n",
1649                         mic->name, mic->restart);
1650                 mic->restart = 0;
1651         }
1652
1653         while (1) {
1654                 while (block_till_state_change(fd, mic)) {
1655                         mpsslog("%s: %s %d block_till_state_change error %s\n",
1656                                 mic->name, __func__, __LINE__, strerror(errno));
1657                         sleep(2);
1658                         continue;
1659                 }
1660
1661                 if (get_mic_state(mic) == MIC_BOOTING)
1662                         break;
1663         }
1664
1665         mic->pid = fork();
1666         switch (mic->pid) {
1667         case 0:
1668                 add_virtio_device(mic, &virtcons_dev_page.dd);
1669                 add_virtio_device(mic, &virtnet_dev_page.dd);
1670                 err = pthread_create(&mic->mic_console.console_thread, NULL,
1671                         virtio_console, mic);
1672                 if (err)
1673                         mpsslog("%s virtcons pthread_create failed %s\n",
1674                                 mic->name, strerror(err));
1675                 err = pthread_create(&mic->mic_net.net_thread, NULL,
1676                         virtio_net, mic);
1677                 if (err)
1678                         mpsslog("%s virtnet pthread_create failed %s\n",
1679                                 mic->name, strerror(err));
1680                 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1681                         virtio_block, mic);
1682                 if (err)
1683                         mpsslog("%s virtblk pthread_create failed %s\n",
1684                                 mic->name, strerror(err));
1685                 sigemptyset(&act.sa_mask);
1686                 err = sigaction(SIGUSR1, &act, NULL);
1687                 if (err)
1688                         mpsslog("%s sigaction SIGUSR1 failed %s\n",
1689                                 mic->name, strerror(errno));
1690                 while (1)
1691                         sleep(60);
1692         case -1:
1693                 mpsslog("fork failed MIC name %s id %d errno %d\n",
1694                         mic->name, mic->id, errno);
1695                 break;
1696         default:
1697                 err = pthread_create(&mic->config_thread, NULL,
1698                                      mic_config, mic);
1699                 if (err)
1700                         mpsslog("%s mic_config pthread_create failed %s\n",
1701                                 mic->name, strerror(err));
1702         }
1703
1704         return NULL;
1705 }
1706
1707 static void
1708 start_daemon(void)
1709 {
1710         struct mic_info *mic;
1711         int err;
1712
1713         for (mic = mic_list.next; mic; mic = mic->next) {
1714                 set_mic_boot_params(mic);
1715                 err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1716                 if (err)
1717                         mpsslog("%s init_mic pthread_create failed %s\n",
1718                                 mic->name, strerror(err));
1719         }
1720
1721         while (1)
1722                 sleep(60);
1723 }
1724
1725 static int
1726 init_mic_list(void)
1727 {
1728         struct mic_info *mic = &mic_list;
1729         struct dirent *file;
1730         DIR *dp;
1731         int cnt = 0;
1732
1733         dp = opendir(MICSYSFSDIR);
1734         if (!dp)
1735                 return 0;
1736
1737         while ((file = readdir(dp)) != NULL) {
1738                 if (!strncmp(file->d_name, "mic", 3)) {
1739                         mic->next = calloc(1, sizeof(struct mic_info));
1740                         if (mic->next) {
1741                                 mic = mic->next;
1742                                 mic->id = atoi(&file->d_name[3]);
1743                                 mic->name = malloc(strlen(file->d_name) + 16);
1744                                 if (mic->name)
1745                                         strcpy(mic->name, file->d_name);
1746                                 mpsslog("MIC name %s id %d\n", mic->name,
1747                                         mic->id);
1748                                 cnt++;
1749                         }
1750                 }
1751         }
1752
1753         closedir(dp);
1754         return cnt;
1755 }
1756
1757 void
1758 mpsslog(char *format, ...)
1759 {
1760         va_list args;
1761         char buffer[4096];
1762         char ts[52], *ts1;
1763         time_t t;
1764
1765         if (logfp == NULL)
1766                 return;
1767
1768         va_start(args, format);
1769         vsprintf(buffer, format, args);
1770         va_end(args);
1771
1772         time(&t);
1773         ts1 = ctime_r(&t, ts);
1774         ts1[strlen(ts1) - 1] = '\0';
1775         fprintf(logfp, "%s: %s", ts1, buffer);
1776
1777         fflush(logfp);
1778 }
1779
1780 int
1781 main(int argc, char *argv[])
1782 {
1783         int cnt;
1784         pid_t pid;
1785
1786         myname = argv[0];
1787
1788         logfp = fopen(LOGFILE_NAME, "a+");
1789         if (!logfp) {
1790                 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1791                 exit(1);
1792         }
1793         pid = fork();
1794         switch (pid) {
1795         case 0:
1796                 break;
1797         case -1:
1798                 exit(2);
1799         default:
1800                 exit(0);
1801         }
1802
1803         mpsslog("MIC Daemon start\n");
1804
1805         cnt = init_mic_list();
1806         if (cnt == 0) {
1807                 mpsslog("MIC module not loaded\n");
1808                 exit(3);
1809         }
1810         mpsslog("MIC found %d devices\n", cnt);
1811
1812         start_daemon();
1813
1814         exit(0);
1815 }