Merge tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / Documentation / mic / mpssd / mpssd.c
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2013 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * The full GNU General Public License is included in this distribution in
16  * the file called "COPYING".
17  *
18  * Intel MIC User Space Tools.
19  */
20
21 #define _GNU_SOURCE
22
23 #include <stdlib.h>
24 #include <fcntl.h>
25 #include <getopt.h>
26 #include <assert.h>
27 #include <unistd.h>
28 #include <stdbool.h>
29 #include <signal.h>
30 #include <poll.h>
31 #include <features.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/socket.h>
36 #include <linux/virtio_ring.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_console.h>
39 #include <linux/virtio_blk.h>
40 #include <linux/version.h>
41 #include "mpssd.h"
42 #include <linux/mic_ioctl.h>
43 #include <linux/mic_common.h>
44 #include <tools/endian.h>
45
46 static void init_mic(struct mic_info *mic);
47
48 static FILE *logfp;
49 static struct mic_info mic_list;
50
51 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
52
53 #define min_t(type, x, y) ({                            \
54                 type __min1 = (x);                      \
55                 type __min2 = (y);                      \
56                 __min1 < __min2 ? __min1 : __min2; })
57
58 /* align addr on a size boundary - adjust address up/down if needed */
59 #define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
60 #define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
61
62 /* align addr on a size boundary - adjust address up if needed */
63 #define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
64
65 /* to align the pointer to the (next) page boundary */
66 #define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
67
68 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
69
70 #define GSO_ENABLED             1
71 #define MAX_GSO_SIZE            (64 * 1024)
72 #define ETH_H_LEN               14
73 #define MAX_NET_PKT_SIZE        (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74 #define MIC_DEVICE_PAGE_END     0x1000
75
76 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
77 #define VIRTIO_NET_HDR_F_DATA_VALID     2       /* Csum is valid */
78 #endif
79
80 static struct {
81         struct mic_device_desc dd;
82         struct mic_vqconfig vqconfig[2];
83         __u32 host_features, guest_acknowledgements;
84         struct virtio_console_config cons_config;
85 } virtcons_dev_page = {
86         .dd = {
87                 .type = VIRTIO_ID_CONSOLE,
88                 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89                 .feature_len = sizeof(virtcons_dev_page.host_features),
90                 .config_len = sizeof(virtcons_dev_page.cons_config),
91         },
92         .vqconfig[0] = {
93                 .num = htole16(MIC_VRING_ENTRIES),
94         },
95         .vqconfig[1] = {
96                 .num = htole16(MIC_VRING_ENTRIES),
97         },
98 };
99
100 static struct {
101         struct mic_device_desc dd;
102         struct mic_vqconfig vqconfig[2];
103         __u32 host_features, guest_acknowledgements;
104         struct virtio_net_config net_config;
105 } virtnet_dev_page = {
106         .dd = {
107                 .type = VIRTIO_ID_NET,
108                 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109                 .feature_len = sizeof(virtnet_dev_page.host_features),
110                 .config_len = sizeof(virtnet_dev_page.net_config),
111         },
112         .vqconfig[0] = {
113                 .num = htole16(MIC_VRING_ENTRIES),
114         },
115         .vqconfig[1] = {
116                 .num = htole16(MIC_VRING_ENTRIES),
117         },
118 #if GSO_ENABLED
119                 .host_features = htole32(
120                 1 << VIRTIO_NET_F_CSUM |
121                 1 << VIRTIO_NET_F_GSO |
122                 1 << VIRTIO_NET_F_GUEST_TSO4 |
123                 1 << VIRTIO_NET_F_GUEST_TSO6 |
124                 1 << VIRTIO_NET_F_GUEST_ECN |
125                 1 << VIRTIO_NET_F_GUEST_UFO),
126 #else
127                 .host_features = 0,
128 #endif
129 };
130
131 static const char *mic_config_dir = "/etc/sysconfig/mic";
132 static const char *virtblk_backend = "VIRTBLK_BACKEND";
133 static struct {
134         struct mic_device_desc dd;
135         struct mic_vqconfig vqconfig[1];
136         __u32 host_features, guest_acknowledgements;
137         struct virtio_blk_config blk_config;
138 } virtblk_dev_page = {
139         .dd = {
140                 .type = VIRTIO_ID_BLOCK,
141                 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
142                 .feature_len = sizeof(virtblk_dev_page.host_features),
143                 .config_len = sizeof(virtblk_dev_page.blk_config),
144         },
145         .vqconfig[0] = {
146                 .num = htole16(MIC_VRING_ENTRIES),
147         },
148         .host_features =
149                 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
150         .blk_config = {
151                 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
152                 .capacity = htole64(0),
153          }
154 };
155
156 static char *myname;
157
158 static int
159 tap_configure(struct mic_info *mic, char *dev)
160 {
161         pid_t pid;
162         char *ifargv[7];
163         char ipaddr[IFNAMSIZ];
164         int ret = 0;
165
166         pid = fork();
167         if (pid == 0) {
168                 ifargv[0] = "ip";
169                 ifargv[1] = "link";
170                 ifargv[2] = "set";
171                 ifargv[3] = dev;
172                 ifargv[4] = "up";
173                 ifargv[5] = NULL;
174                 mpsslog("Configuring %s\n", dev);
175                 ret = execvp("ip", ifargv);
176                 if (ret < 0) {
177                         mpsslog("%s execvp failed errno %s\n",
178                                 mic->name, strerror(errno));
179                         return ret;
180                 }
181         }
182         if (pid < 0) {
183                 mpsslog("%s fork failed errno %s\n",
184                         mic->name, strerror(errno));
185                 return ret;
186         }
187
188         ret = waitpid(pid, NULL, 0);
189         if (ret < 0) {
190                 mpsslog("%s waitpid failed errno %s\n",
191                         mic->name, strerror(errno));
192                 return ret;
193         }
194
195         snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
196
197         pid = fork();
198         if (pid == 0) {
199                 ifargv[0] = "ip";
200                 ifargv[1] = "addr";
201                 ifargv[2] = "add";
202                 ifargv[3] = ipaddr;
203                 ifargv[4] = "dev";
204                 ifargv[5] = dev;
205                 ifargv[6] = NULL;
206                 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
207                 ret = execvp("ip", ifargv);
208                 if (ret < 0) {
209                         mpsslog("%s execvp failed errno %s\n",
210                                 mic->name, strerror(errno));
211                         return ret;
212                 }
213         }
214         if (pid < 0) {
215                 mpsslog("%s fork failed errno %s\n",
216                         mic->name, strerror(errno));
217                 return ret;
218         }
219
220         ret = waitpid(pid, NULL, 0);
221         if (ret < 0) {
222                 mpsslog("%s waitpid failed errno %s\n",
223                         mic->name, strerror(errno));
224                 return ret;
225         }
226         mpsslog("MIC name %s %s %d DONE!\n",
227                 mic->name, __func__, __LINE__);
228         return 0;
229 }
230
231 static int tun_alloc(struct mic_info *mic, char *dev)
232 {
233         struct ifreq ifr;
234         int fd, err;
235 #if GSO_ENABLED
236         unsigned offload;
237 #endif
238         fd = open("/dev/net/tun", O_RDWR);
239         if (fd < 0) {
240                 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
241                 goto done;
242         }
243
244         memset(&ifr, 0, sizeof(ifr));
245
246         ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
247         if (*dev)
248                 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
249
250         err = ioctl(fd, TUNSETIFF, (void *)&ifr);
251         if (err < 0) {
252                 mpsslog("%s %s %d TUNSETIFF failed %s\n",
253                         mic->name, __func__, __LINE__, strerror(errno));
254                 close(fd);
255                 return err;
256         }
257 #if GSO_ENABLED
258         offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
259                 TUN_F_TSO_ECN | TUN_F_UFO;
260
261         err = ioctl(fd, TUNSETOFFLOAD, offload);
262         if (err < 0) {
263                 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
264                         mic->name, __func__, __LINE__, strerror(errno));
265                 close(fd);
266                 return err;
267         }
268 #endif
269         strcpy(dev, ifr.ifr_name);
270         mpsslog("Created TAP %s\n", dev);
271 done:
272         return fd;
273 }
274
275 #define NET_FD_VIRTIO_NET 0
276 #define NET_FD_TUN 1
277 #define MAX_NET_FD 2
278
279 static void set_dp(struct mic_info *mic, int type, void *dp)
280 {
281         switch (type) {
282         case VIRTIO_ID_CONSOLE:
283                 mic->mic_console.console_dp = dp;
284                 return;
285         case VIRTIO_ID_NET:
286                 mic->mic_net.net_dp = dp;
287                 return;
288         case VIRTIO_ID_BLOCK:
289                 mic->mic_virtblk.block_dp = dp;
290                 return;
291         }
292         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
293         assert(0);
294 }
295
296 static void *get_dp(struct mic_info *mic, int type)
297 {
298         switch (type) {
299         case VIRTIO_ID_CONSOLE:
300                 return mic->mic_console.console_dp;
301         case VIRTIO_ID_NET:
302                 return mic->mic_net.net_dp;
303         case VIRTIO_ID_BLOCK:
304                 return mic->mic_virtblk.block_dp;
305         }
306         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
307         assert(0);
308         return NULL;
309 }
310
311 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
312 {
313         struct mic_device_desc *d;
314         int i;
315         void *dp = get_dp(mic, type);
316
317         for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
318                 i += mic_total_desc_size(d)) {
319                 d = dp + i;
320
321                 /* End of list */
322                 if (d->type == 0)
323                         break;
324
325                 if (d->type == -1)
326                         continue;
327
328                 mpsslog("%s %s d-> type %d d %p\n",
329                         mic->name, __func__, d->type, d);
330
331                 if (d->type == (__u8)type)
332                         return d;
333         }
334         mpsslog("%s %s %d not found\n", mic->name, __func__, type);
335         assert(0);
336         return NULL;
337 }
338
339 /* See comments in vhost.c for explanation of next_desc() */
340 static unsigned next_desc(struct vring_desc *desc)
341 {
342         unsigned int next;
343
344         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
345                 return -1U;
346         next = le16toh(desc->next);
347         return next;
348 }
349
350 /* Sum up all the IOVEC length */
351 static ssize_t
352 sum_iovec_len(struct mic_copy_desc *copy)
353 {
354         ssize_t sum = 0;
355         int i;
356
357         for (i = 0; i < copy->iovcnt; i++)
358                 sum += copy->iov[i].iov_len;
359         return sum;
360 }
361
362 static inline void verify_out_len(struct mic_info *mic,
363         struct mic_copy_desc *copy)
364 {
365         if (copy->out_len != sum_iovec_len(copy)) {
366                 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
367                         mic->name, __func__, __LINE__,
368                         copy->out_len, sum_iovec_len(copy));
369                 assert(copy->out_len == sum_iovec_len(copy));
370         }
371 }
372
373 /* Display an iovec */
374 static void
375 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
376            const char *s, int line)
377 {
378         int i;
379
380         for (i = 0; i < copy->iovcnt; i++)
381                 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
382                         mic->name, s, line, i,
383                         copy->iov[i].iov_base, copy->iov[i].iov_len);
384 }
385
386 static inline __u16 read_avail_idx(struct mic_vring *vr)
387 {
388         return ACCESS_ONCE(vr->info->avail_idx);
389 }
390
391 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
392                                 struct mic_copy_desc *copy, ssize_t len)
393 {
394         copy->vr_idx = tx ? 0 : 1;
395         copy->update_used = true;
396         if (type == VIRTIO_ID_NET)
397                 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
398         else
399                 copy->iov[0].iov_len = len;
400 }
401
402 /* Central API which triggers the copies */
403 static int
404 mic_virtio_copy(struct mic_info *mic, int fd,
405                 struct mic_vring *vr, struct mic_copy_desc *copy)
406 {
407         int ret;
408
409         ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
410         if (ret) {
411                 mpsslog("%s %s %d errno %s ret %d\n",
412                         mic->name, __func__, __LINE__,
413                         strerror(errno), ret);
414         }
415         return ret;
416 }
417
418 /*
419  * This initialization routine requires at least one
420  * vring i.e. vr0. vr1 is optional.
421  */
422 static void *
423 init_vr(struct mic_info *mic, int fd, int type,
424         struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
425 {
426         int vr_size;
427         char *va;
428
429         vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
430                 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
431         va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
432                 PROT_READ, MAP_SHARED, fd, 0);
433         if (MAP_FAILED == va) {
434                 mpsslog("%s %s %d mmap failed errno %s\n",
435                         mic->name, __func__, __LINE__,
436                         strerror(errno));
437                 goto done;
438         }
439         set_dp(mic, type, va);
440         vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
441         vr0->info = vr0->va +
442                 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
443         vring_init(&vr0->vr,
444                    MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
445         mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
446                 __func__, mic->name, vr0->va, vr0->info, vr_size,
447                 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
448         mpsslog("magic 0x%x expected 0x%x\n",
449                 le32toh(vr0->info->magic), MIC_MAGIC + type);
450         assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
451         if (vr1) {
452                 vr1->va = (struct mic_vring *)
453                         &va[MIC_DEVICE_PAGE_END + vr_size];
454                 vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
455                         MIC_VIRTIO_RING_ALIGN);
456                 vring_init(&vr1->vr,
457                            MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
458                 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
459                         __func__, mic->name, vr1->va, vr1->info, vr_size,
460                         vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
461                 mpsslog("magic 0x%x expected 0x%x\n",
462                         le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
463                 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
464         }
465 done:
466         return va;
467 }
468
469 static void
470 wait_for_card_driver(struct mic_info *mic, int fd, int type)
471 {
472         struct pollfd pollfd;
473         int err;
474         struct mic_device_desc *desc = get_device_desc(mic, type);
475
476         pollfd.fd = fd;
477         mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
478                 mic->name, __func__, type, desc->status);
479         while (1) {
480                 pollfd.events = POLLIN;
481                 pollfd.revents = 0;
482                 err = poll(&pollfd, 1, -1);
483                 if (err < 0) {
484                         mpsslog("%s %s poll failed %s\n",
485                                 mic->name, __func__, strerror(errno));
486                         continue;
487                 }
488
489                 if (pollfd.revents) {
490                         mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
491                                 mic->name, __func__, type, desc->status);
492                         if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
493                                 mpsslog("%s %s poll.revents %d\n",
494                                         mic->name, __func__, pollfd.revents);
495                                 mpsslog("%s %s desc-> type %d status 0x%x\n",
496                                         mic->name, __func__, type,
497                                         desc->status);
498                                 break;
499                         }
500                 }
501         }
502 }
503
504 /* Spin till we have some descriptors */
505 static void
506 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
507 {
508         __u16 avail_idx = read_avail_idx(vr);
509
510         while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
511 #ifdef DEBUG
512                 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
513                         mic->name, __func__,
514                         le16toh(vr->vr.avail->idx), vr->info->avail_idx);
515 #endif
516                 sched_yield();
517         }
518 }
519
520 static void *
521 virtio_net(void *arg)
522 {
523         static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
524         static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
525         struct iovec vnet_iov[2][2] = {
526                 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
527                   { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
528                 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
529                   { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
530         };
531         struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
532         struct mic_info *mic = (struct mic_info *)arg;
533         char if_name[IFNAMSIZ];
534         struct pollfd net_poll[MAX_NET_FD];
535         struct mic_vring tx_vr, rx_vr;
536         struct mic_copy_desc copy;
537         struct mic_device_desc *desc;
538         int err;
539
540         snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
541         mic->mic_net.tap_fd = tun_alloc(mic, if_name);
542         if (mic->mic_net.tap_fd < 0)
543                 goto done;
544
545         if (tap_configure(mic, if_name))
546                 goto done;
547         mpsslog("MIC name %s id %d\n", mic->name, mic->id);
548
549         net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
550         net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
551         net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
552         net_poll[NET_FD_TUN].events = POLLIN;
553
554         if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
555                                   VIRTIO_ID_NET, &tx_vr, &rx_vr,
556                 virtnet_dev_page.dd.num_vq)) {
557                 mpsslog("%s init_vr failed %s\n",
558                         mic->name, strerror(errno));
559                 goto done;
560         }
561
562         copy.iovcnt = 2;
563         desc = get_device_desc(mic, VIRTIO_ID_NET);
564
565         while (1) {
566                 ssize_t len;
567
568                 net_poll[NET_FD_VIRTIO_NET].revents = 0;
569                 net_poll[NET_FD_TUN].revents = 0;
570
571                 /* Start polling for data from tap and virtio net */
572                 err = poll(net_poll, 2, -1);
573                 if (err < 0) {
574                         mpsslog("%s poll failed %s\n",
575                                 __func__, strerror(errno));
576                         continue;
577                 }
578                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
579                         wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
580                                              VIRTIO_ID_NET);
581                 /*
582                  * Check if there is data to be read from TUN and write to
583                  * virtio net fd if there is.
584                  */
585                 if (net_poll[NET_FD_TUN].revents & POLLIN) {
586                         copy.iov = iov0;
587                         len = readv(net_poll[NET_FD_TUN].fd,
588                                 copy.iov, copy.iovcnt);
589                         if (len > 0) {
590                                 struct virtio_net_hdr *hdr
591                                         = (struct virtio_net_hdr *)vnet_hdr[0];
592
593                                 /* Disable checksums on the card since we are on
594                                    a reliable PCIe link */
595                                 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
596 #ifdef DEBUG
597                                 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
598                                         __func__, __LINE__, hdr->flags);
599                                 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
600                                         copy.out_len, hdr->gso_type);
601 #endif
602 #ifdef DEBUG
603                                 disp_iovec(mic, copy, __func__, __LINE__);
604                                 mpsslog("%s %s %d read from tap 0x%lx\n",
605                                         mic->name, __func__, __LINE__,
606                                         len);
607 #endif
608                                 spin_for_descriptors(mic, &tx_vr);
609                                 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
610                                              len);
611
612                                 err = mic_virtio_copy(mic,
613                                         mic->mic_net.virtio_net_fd, &tx_vr,
614                                         &copy);
615                                 if (err < 0) {
616                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
617                                                 mic->name, __func__, __LINE__,
618                                                 strerror(errno));
619                                 }
620                                 if (!err)
621                                         verify_out_len(mic, &copy);
622 #ifdef DEBUG
623                                 disp_iovec(mic, copy, __func__, __LINE__);
624                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
625                                         mic->name, __func__, __LINE__,
626                                         sum_iovec_len(&copy));
627 #endif
628                                 /* Reinitialize IOV for next run */
629                                 iov0[1].iov_len = MAX_NET_PKT_SIZE;
630                         } else if (len < 0) {
631                                 disp_iovec(mic, &copy, __func__, __LINE__);
632                                 mpsslog("%s %s %d read failed %s ", mic->name,
633                                         __func__, __LINE__, strerror(errno));
634                                 mpsslog("cnt %d sum %zd\n",
635                                         copy.iovcnt, sum_iovec_len(&copy));
636                         }
637                 }
638
639                 /*
640                  * Check if there is data to be read from virtio net and
641                  * write to TUN if there is.
642                  */
643                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
644                         while (rx_vr.info->avail_idx !=
645                                 le16toh(rx_vr.vr.avail->idx)) {
646                                 copy.iov = iov1;
647                                 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
648                                              MAX_NET_PKT_SIZE
649                                         + sizeof(struct virtio_net_hdr));
650
651                                 err = mic_virtio_copy(mic,
652                                         mic->mic_net.virtio_net_fd, &rx_vr,
653                                         &copy);
654                                 if (!err) {
655 #ifdef DEBUG
656                                         struct virtio_net_hdr *hdr
657                                                 = (struct virtio_net_hdr *)
658                                                         vnet_hdr[1];
659
660                                         mpsslog("%s %s %d hdr->flags 0x%x, ",
661                                                 mic->name, __func__, __LINE__,
662                                                 hdr->flags);
663                                         mpsslog("out_len %d gso_type 0x%x\n",
664                                                 copy.out_len,
665                                                 hdr->gso_type);
666 #endif
667                                         /* Set the correct output iov_len */
668                                         iov1[1].iov_len = copy.out_len -
669                                                 sizeof(struct virtio_net_hdr);
670                                         verify_out_len(mic, &copy);
671 #ifdef DEBUG
672                                         disp_iovec(mic, copy, __func__,
673                                                    __LINE__);
674                                         mpsslog("%s %s %d ",
675                                                 mic->name, __func__, __LINE__);
676                                         mpsslog("read from net 0x%lx\n",
677                                                 sum_iovec_len(copy));
678 #endif
679                                         len = writev(net_poll[NET_FD_TUN].fd,
680                                                 copy.iov, copy.iovcnt);
681                                         if (len != sum_iovec_len(&copy)) {
682                                                 mpsslog("Tun write failed %s ",
683                                                         strerror(errno));
684                                                 mpsslog("len 0x%zx ", len);
685                                                 mpsslog("read_len 0x%zx\n",
686                                                         sum_iovec_len(&copy));
687                                         } else {
688 #ifdef DEBUG
689                                                 disp_iovec(mic, &copy, __func__,
690                                                            __LINE__);
691                                                 mpsslog("%s %s %d ",
692                                                         mic->name, __func__,
693                                                         __LINE__);
694                                                 mpsslog("wrote to tap 0x%lx\n",
695                                                         len);
696 #endif
697                                         }
698                                 } else {
699                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
700                                                 mic->name, __func__, __LINE__,
701                                                 strerror(errno));
702                                         break;
703                                 }
704                         }
705                 }
706                 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
707                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
708         }
709 done:
710         pthread_exit(NULL);
711 }
712
713 /* virtio_console */
714 #define VIRTIO_CONSOLE_FD 0
715 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
716 #define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
717 #define MAX_BUFFER_SIZE PAGE_SIZE
718
719 static void *
720 virtio_console(void *arg)
721 {
722         static __u8 vcons_buf[2][PAGE_SIZE];
723         struct iovec vcons_iov[2] = {
724                 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
725                 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
726         };
727         struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
728         struct mic_info *mic = (struct mic_info *)arg;
729         int err;
730         struct pollfd console_poll[MAX_CONSOLE_FD];
731         int pty_fd;
732         char *pts_name;
733         ssize_t len;
734         struct mic_vring tx_vr, rx_vr;
735         struct mic_copy_desc copy;
736         struct mic_device_desc *desc;
737
738         pty_fd = posix_openpt(O_RDWR);
739         if (pty_fd < 0) {
740                 mpsslog("can't open a pseudoterminal master device: %s\n",
741                         strerror(errno));
742                 goto _return;
743         }
744         pts_name = ptsname(pty_fd);
745         if (pts_name == NULL) {
746                 mpsslog("can't get pts name\n");
747                 goto _close_pty;
748         }
749         printf("%s console message goes to %s\n", mic->name, pts_name);
750         mpsslog("%s console message goes to %s\n", mic->name, pts_name);
751         err = grantpt(pty_fd);
752         if (err < 0) {
753                 mpsslog("can't grant access: %s %s\n",
754                         pts_name, strerror(errno));
755                 goto _close_pty;
756         }
757         err = unlockpt(pty_fd);
758         if (err < 0) {
759                 mpsslog("can't unlock a pseudoterminal: %s %s\n",
760                         pts_name, strerror(errno));
761                 goto _close_pty;
762         }
763         console_poll[MONITOR_FD].fd = pty_fd;
764         console_poll[MONITOR_FD].events = POLLIN;
765
766         console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
767         console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
768
769         if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
770                                   VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
771                 virtcons_dev_page.dd.num_vq)) {
772                 mpsslog("%s init_vr failed %s\n",
773                         mic->name, strerror(errno));
774                 goto _close_pty;
775         }
776
777         copy.iovcnt = 1;
778         desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
779
780         for (;;) {
781                 console_poll[MONITOR_FD].revents = 0;
782                 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
783                 err = poll(console_poll, MAX_CONSOLE_FD, -1);
784                 if (err < 0) {
785                         mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
786                                 strerror(errno));
787                         continue;
788                 }
789                 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
790                         wait_for_card_driver(mic,
791                                              mic->mic_console.virtio_console_fd,
792                                 VIRTIO_ID_CONSOLE);
793
794                 if (console_poll[MONITOR_FD].revents & POLLIN) {
795                         copy.iov = iov0;
796                         len = readv(pty_fd, copy.iov, copy.iovcnt);
797                         if (len > 0) {
798 #ifdef DEBUG
799                                 disp_iovec(mic, copy, __func__, __LINE__);
800                                 mpsslog("%s %s %d read from tap 0x%lx\n",
801                                         mic->name, __func__, __LINE__,
802                                         len);
803 #endif
804                                 spin_for_descriptors(mic, &tx_vr);
805                                 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
806                                              &copy, len);
807
808                                 err = mic_virtio_copy(mic,
809                                         mic->mic_console.virtio_console_fd,
810                                         &tx_vr, &copy);
811                                 if (err < 0) {
812                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
813                                                 mic->name, __func__, __LINE__,
814                                                 strerror(errno));
815                                 }
816                                 if (!err)
817                                         verify_out_len(mic, &copy);
818 #ifdef DEBUG
819                                 disp_iovec(mic, copy, __func__, __LINE__);
820                                 mpsslog("%s %s %d wrote to net 0x%lx\n",
821                                         mic->name, __func__, __LINE__,
822                                         sum_iovec_len(copy));
823 #endif
824                                 /* Reinitialize IOV for next run */
825                                 iov0->iov_len = PAGE_SIZE;
826                         } else if (len < 0) {
827                                 disp_iovec(mic, &copy, __func__, __LINE__);
828                                 mpsslog("%s %s %d read failed %s ",
829                                         mic->name, __func__, __LINE__,
830                                         strerror(errno));
831                                 mpsslog("cnt %d sum %zd\n",
832                                         copy.iovcnt, sum_iovec_len(&copy));
833                         }
834                 }
835
836                 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
837                         while (rx_vr.info->avail_idx !=
838                                 le16toh(rx_vr.vr.avail->idx)) {
839                                 copy.iov = iov1;
840                                 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
841                                              &copy, PAGE_SIZE);
842
843                                 err = mic_virtio_copy(mic,
844                                         mic->mic_console.virtio_console_fd,
845                                         &rx_vr, &copy);
846                                 if (!err) {
847                                         /* Set the correct output iov_len */
848                                         iov1->iov_len = copy.out_len;
849                                         verify_out_len(mic, &copy);
850 #ifdef DEBUG
851                                         disp_iovec(mic, copy, __func__,
852                                                    __LINE__);
853                                         mpsslog("%s %s %d ",
854                                                 mic->name, __func__, __LINE__);
855                                         mpsslog("read from net 0x%lx\n",
856                                                 sum_iovec_len(copy));
857 #endif
858                                         len = writev(pty_fd,
859                                                 copy.iov, copy.iovcnt);
860                                         if (len != sum_iovec_len(&copy)) {
861                                                 mpsslog("Tun write failed %s ",
862                                                         strerror(errno));
863                                                 mpsslog("len 0x%zx ", len);
864                                                 mpsslog("read_len 0x%zx\n",
865                                                         sum_iovec_len(&copy));
866                                         } else {
867 #ifdef DEBUG
868                                                 disp_iovec(mic, copy, __func__,
869                                                            __LINE__);
870                                                 mpsslog("%s %s %d ",
871                                                         mic->name, __func__,
872                                                         __LINE__);
873                                                 mpsslog("wrote to tap 0x%lx\n",
874                                                         len);
875 #endif
876                                         }
877                                 } else {
878                                         mpsslog("%s %s %d mic_virtio_copy %s\n",
879                                                 mic->name, __func__, __LINE__,
880                                                 strerror(errno));
881                                         break;
882                                 }
883                         }
884                 }
885                 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
886                         mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
887         }
888 _close_pty:
889         close(pty_fd);
890 _return:
891         pthread_exit(NULL);
892 }
893
894 static void
895 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
896 {
897         char path[PATH_MAX];
898         int fd, err;
899
900         snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
901         fd = open(path, O_RDWR);
902         if (fd < 0) {
903                 mpsslog("Could not open %s %s\n", path, strerror(errno));
904                 return;
905         }
906
907         err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
908         if (err < 0) {
909                 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
910                 close(fd);
911                 return;
912         }
913         switch (dd->type) {
914         case VIRTIO_ID_NET:
915                 mic->mic_net.virtio_net_fd = fd;
916                 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
917                 break;
918         case VIRTIO_ID_CONSOLE:
919                 mic->mic_console.virtio_console_fd = fd;
920                 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
921                 break;
922         case VIRTIO_ID_BLOCK:
923                 mic->mic_virtblk.virtio_block_fd = fd;
924                 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
925                 break;
926         }
927 }
928
929 static bool
930 set_backend_file(struct mic_info *mic)
931 {
932         FILE *config;
933         char buff[PATH_MAX], *line, *evv, *p;
934
935         snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
936         config = fopen(buff, "r");
937         if (config == NULL)
938                 return false;
939         do {  /* look for "virtblk_backend=XXXX" */
940                 line = fgets(buff, PATH_MAX, config);
941                 if (line == NULL)
942                         break;
943                 if (*line == '#')
944                         continue;
945                 p = strchr(line, '\n');
946                 if (p)
947                         *p = '\0';
948         } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
949         fclose(config);
950         if (line == NULL)
951                 return false;
952         evv = strchr(line, '=');
953         if (evv == NULL)
954                 return false;
955         mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
956         if (mic->mic_virtblk.backend_file == NULL) {
957                 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
958                 return false;
959         }
960         strcpy(mic->mic_virtblk.backend_file, evv + 1);
961         return true;
962 }
963
964 #define SECTOR_SIZE 512
965 static bool
966 set_backend_size(struct mic_info *mic)
967 {
968         mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
969                 SEEK_END);
970         if (mic->mic_virtblk.backend_size < 0) {
971                 mpsslog("%s: can't seek: %s\n",
972                         mic->name, mic->mic_virtblk.backend_file);
973                 return false;
974         }
975         virtblk_dev_page.blk_config.capacity =
976                 mic->mic_virtblk.backend_size / SECTOR_SIZE;
977         if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
978                 virtblk_dev_page.blk_config.capacity++;
979
980         virtblk_dev_page.blk_config.capacity =
981                 htole64(virtblk_dev_page.blk_config.capacity);
982
983         return true;
984 }
985
986 static bool
987 open_backend(struct mic_info *mic)
988 {
989         if (!set_backend_file(mic))
990                 goto _error_exit;
991         mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
992         if (mic->mic_virtblk.backend < 0) {
993                 mpsslog("%s: can't open: %s\n", mic->name,
994                         mic->mic_virtblk.backend_file);
995                 goto _error_free;
996         }
997         if (!set_backend_size(mic))
998                 goto _error_close;
999         mic->mic_virtblk.backend_addr = mmap(NULL,
1000                 mic->mic_virtblk.backend_size,
1001                 PROT_READ|PROT_WRITE, MAP_SHARED,
1002                 mic->mic_virtblk.backend, 0L);
1003         if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1004                 mpsslog("%s: can't map: %s %s\n",
1005                         mic->name, mic->mic_virtblk.backend_file,
1006                         strerror(errno));
1007                 goto _error_close;
1008         }
1009         return true;
1010
1011  _error_close:
1012         close(mic->mic_virtblk.backend);
1013  _error_free:
1014         free(mic->mic_virtblk.backend_file);
1015  _error_exit:
1016         return false;
1017 }
1018
1019 static void
1020 close_backend(struct mic_info *mic)
1021 {
1022         munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1023         close(mic->mic_virtblk.backend);
1024         free(mic->mic_virtblk.backend_file);
1025 }
1026
1027 static bool
1028 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1029 {
1030         if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1031                 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1032                         mic->name);
1033                 return false;
1034         }
1035         add_virtio_device(mic, &virtblk_dev_page.dd);
1036         if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1037                                   VIRTIO_ID_BLOCK, vring, NULL,
1038                                   virtblk_dev_page.dd.num_vq)) {
1039                 mpsslog("%s init_vr failed %s\n",
1040                         mic->name, strerror(errno));
1041                 return false;
1042         }
1043         return true;
1044 }
1045
1046 static void
1047 stop_virtblk(struct mic_info *mic)
1048 {
1049         int vr_size, ret;
1050
1051         vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1052                 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1053         ret = munmap(mic->mic_virtblk.block_dp,
1054                 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1055         if (ret < 0)
1056                 mpsslog("%s munmap errno %d\n", mic->name, errno);
1057         close(mic->mic_virtblk.virtio_block_fd);
1058 }
1059
1060 static __u8
1061 header_error_check(struct vring_desc *desc)
1062 {
1063         if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1064                 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1065                         __func__, __LINE__);
1066                 return -EIO;
1067         }
1068         if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1069                 mpsslog("%s() %d: alone\n",
1070                         __func__, __LINE__);
1071                 return -EIO;
1072         }
1073         if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1074                 mpsslog("%s() %d: not read\n",
1075                         __func__, __LINE__);
1076                 return -EIO;
1077         }
1078         return 0;
1079 }
1080
1081 static int
1082 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1083 {
1084         struct iovec iovec;
1085         struct mic_copy_desc copy;
1086
1087         iovec.iov_len = sizeof(*hdr);
1088         iovec.iov_base = hdr;
1089         copy.iov = &iovec;
1090         copy.iovcnt = 1;
1091         copy.vr_idx = 0;  /* only one vring on virtio_block */
1092         copy.update_used = false;  /* do not update used index */
1093         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1094 }
1095
1096 static int
1097 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1098 {
1099         struct mic_copy_desc copy;
1100
1101         copy.iov = iovec;
1102         copy.iovcnt = iovcnt;
1103         copy.vr_idx = 0;  /* only one vring on virtio_block */
1104         copy.update_used = false;  /* do not update used index */
1105         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1106 }
1107
1108 static __u8
1109 status_error_check(struct vring_desc *desc)
1110 {
1111         if (le32toh(desc->len) != sizeof(__u8)) {
1112                 mpsslog("%s() %d: length is not sizeof(status)\n",
1113                         __func__, __LINE__);
1114                 return -EIO;
1115         }
1116         return 0;
1117 }
1118
1119 static int
1120 write_status(int fd, __u8 *status)
1121 {
1122         struct iovec iovec;
1123         struct mic_copy_desc copy;
1124
1125         iovec.iov_base = status;
1126         iovec.iov_len = sizeof(*status);
1127         copy.iov = &iovec;
1128         copy.iovcnt = 1;
1129         copy.vr_idx = 0;  /* only one vring on virtio_block */
1130         copy.update_used = true; /* Update used index */
1131         return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1132 }
1133
1134 static void *
1135 virtio_block(void *arg)
1136 {
1137         struct mic_info *mic = (struct mic_info *)arg;
1138         int ret;
1139         struct pollfd block_poll;
1140         struct mic_vring vring;
1141         __u16 avail_idx;
1142         __u32 desc_idx;
1143         struct vring_desc *desc;
1144         struct iovec *iovec, *piov;
1145         __u8 status;
1146         __u32 buffer_desc_idx;
1147         struct virtio_blk_outhdr hdr;
1148         void *fos;
1149
1150         for (;;) {  /* forever */
1151                 if (!open_backend(mic)) { /* No virtblk */
1152                         for (mic->mic_virtblk.signaled = 0;
1153                                 !mic->mic_virtblk.signaled;)
1154                                 sleep(1);
1155                         continue;
1156                 }
1157
1158                 /* backend file is specified. */
1159                 if (!start_virtblk(mic, &vring))
1160                         goto _close_backend;
1161                 iovec = malloc(sizeof(*iovec) *
1162                         le32toh(virtblk_dev_page.blk_config.seg_max));
1163                 if (!iovec) {
1164                         mpsslog("%s: can't alloc iovec: %s\n",
1165                                 mic->name, strerror(ENOMEM));
1166                         goto _stop_virtblk;
1167                 }
1168
1169                 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1170                 block_poll.events = POLLIN;
1171                 for (mic->mic_virtblk.signaled = 0;
1172                      !mic->mic_virtblk.signaled;) {
1173                         block_poll.revents = 0;
1174                                         /* timeout in 1 sec to see signaled */
1175                         ret = poll(&block_poll, 1, 1000);
1176                         if (ret < 0) {
1177                                 mpsslog("%s %d: poll failed: %s\n",
1178                                         __func__, __LINE__,
1179                                         strerror(errno));
1180                                 continue;
1181                         }
1182
1183                         if (!(block_poll.revents & POLLIN)) {
1184 #ifdef DEBUG
1185                                 mpsslog("%s %d: block_poll.revents=0x%x\n",
1186                                         __func__, __LINE__, block_poll.revents);
1187 #endif
1188                                 continue;
1189                         }
1190
1191                         /* POLLIN */
1192                         while (vring.info->avail_idx !=
1193                                 le16toh(vring.vr.avail->idx)) {
1194                                 /* read header element */
1195                                 avail_idx =
1196                                         vring.info->avail_idx &
1197                                         (vring.vr.num - 1);
1198                                 desc_idx = le16toh(
1199                                         vring.vr.avail->ring[avail_idx]);
1200                                 desc = &vring.vr.desc[desc_idx];
1201 #ifdef DEBUG
1202                                 mpsslog("%s() %d: avail_idx=%d ",
1203                                         __func__, __LINE__,
1204                                         vring.info->avail_idx);
1205                                 mpsslog("vring.vr.num=%d desc=%p\n",
1206                                         vring.vr.num, desc);
1207 #endif
1208                                 status = header_error_check(desc);
1209                                 ret = read_header(
1210                                         mic->mic_virtblk.virtio_block_fd,
1211                                         &hdr, desc_idx);
1212                                 if (ret < 0) {
1213                                         mpsslog("%s() %d %s: ret=%d %s\n",
1214                                                 __func__, __LINE__,
1215                                                 mic->name, ret,
1216                                                 strerror(errno));
1217                                         break;
1218                                 }
1219                                 /* buffer element */
1220                                 piov = iovec;
1221                                 status = 0;
1222                                 fos = mic->mic_virtblk.backend_addr +
1223                                         (hdr.sector * SECTOR_SIZE);
1224                                 buffer_desc_idx = next_desc(desc);
1225                                 desc_idx = buffer_desc_idx;
1226                                 for (desc = &vring.vr.desc[buffer_desc_idx];
1227                                      desc->flags & VRING_DESC_F_NEXT;
1228                                      desc_idx = next_desc(desc),
1229                                              desc = &vring.vr.desc[desc_idx]) {
1230                                         piov->iov_len = desc->len;
1231                                         piov->iov_base = fos;
1232                                         piov++;
1233                                         fos += desc->len;
1234                                 }
1235                                 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1236                                 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1237                                         VIRTIO_BLK_T_GET_ID)) {
1238                                         /*
1239                                           VIRTIO_BLK_T_IN - does not do
1240                                           anything. Probably for documenting.
1241                                           VIRTIO_BLK_T_SCSI_CMD - for
1242                                           virtio_scsi.
1243                                           VIRTIO_BLK_T_FLUSH - turned off in
1244                                           config space.
1245                                           VIRTIO_BLK_T_BARRIER - defined but not
1246                                           used in anywhere.
1247                                         */
1248                                         mpsslog("%s() %d: type %x ",
1249                                                 __func__, __LINE__,
1250                                                 hdr.type);
1251                                         mpsslog("is not supported\n");
1252                                         status = -ENOTSUP;
1253
1254                                 } else {
1255                                         ret = transfer_blocks(
1256                                         mic->mic_virtblk.virtio_block_fd,
1257                                                 iovec,
1258                                                 piov - iovec);
1259                                         if (ret < 0 &&
1260                                             status != 0)
1261                                                 status = ret;
1262                                 }
1263                                 /* write status and update used pointer */
1264                                 if (status != 0)
1265                                         status = status_error_check(desc);
1266                                 ret = write_status(
1267                                         mic->mic_virtblk.virtio_block_fd,
1268                                         &status);
1269 #ifdef DEBUG
1270                                 mpsslog("%s() %d: write status=%d on desc=%p\n",
1271                                         __func__, __LINE__,
1272                                         status, desc);
1273 #endif
1274                         }
1275                 }
1276                 free(iovec);
1277 _stop_virtblk:
1278                 stop_virtblk(mic);
1279 _close_backend:
1280                 close_backend(mic);
1281         }  /* forever */
1282
1283         pthread_exit(NULL);
1284 }
1285
1286 static void
1287 reset(struct mic_info *mic)
1288 {
1289 #define RESET_TIMEOUT 120
1290         int i = RESET_TIMEOUT;
1291         setsysfs(mic->name, "state", "reset");
1292         while (i) {
1293                 char *state;
1294                 state = readsysfs(mic->name, "state");
1295                 if (!state)
1296                         goto retry;
1297                 mpsslog("%s: %s %d state %s\n",
1298                         mic->name, __func__, __LINE__, state);
1299
1300                 /*
1301                  * If the shutdown was initiated by OSPM, the state stays
1302                  * in "suspended" which is also a valid condition for reset.
1303                  */
1304                 if ((!strcmp(state, "offline")) ||
1305                     (!strcmp(state, "suspended"))) {
1306                         free(state);
1307                         break;
1308                 }
1309                 free(state);
1310 retry:
1311                 sleep(1);
1312                 i--;
1313         }
1314 }
1315
1316 static int
1317 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1318 {
1319         if (!strcmp(shutdown_status, "nop"))
1320                 return MIC_NOP;
1321         if (!strcmp(shutdown_status, "crashed"))
1322                 return MIC_CRASHED;
1323         if (!strcmp(shutdown_status, "halted"))
1324                 return MIC_HALTED;
1325         if (!strcmp(shutdown_status, "poweroff"))
1326                 return MIC_POWER_OFF;
1327         if (!strcmp(shutdown_status, "restart"))
1328                 return MIC_RESTART;
1329         mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1330         /* Invalid state */
1331         assert(0);
1332 };
1333
1334 static int get_mic_state(struct mic_info *mic, char *state)
1335 {
1336         if (!strcmp(state, "offline"))
1337                 return MIC_OFFLINE;
1338         if (!strcmp(state, "online"))
1339                 return MIC_ONLINE;
1340         if (!strcmp(state, "shutting_down"))
1341                 return MIC_SHUTTING_DOWN;
1342         if (!strcmp(state, "reset_failed"))
1343                 return MIC_RESET_FAILED;
1344         if (!strcmp(state, "suspending"))
1345                 return MIC_SUSPENDING;
1346         if (!strcmp(state, "suspended"))
1347                 return MIC_SUSPENDED;
1348         mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1349         /* Invalid state */
1350         assert(0);
1351 };
1352
1353 static void mic_handle_shutdown(struct mic_info *mic)
1354 {
1355 #define SHUTDOWN_TIMEOUT 60
1356         int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1357         char *shutdown_status;
1358         while (i) {
1359                 shutdown_status = readsysfs(mic->name, "shutdown_status");
1360                 if (!shutdown_status)
1361                         continue;
1362                 mpsslog("%s: %s %d shutdown_status %s\n",
1363                         mic->name, __func__, __LINE__, shutdown_status);
1364                 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1365                 case MIC_RESTART:
1366                         mic->restart = 1;
1367                 case MIC_HALTED:
1368                 case MIC_POWER_OFF:
1369                 case MIC_CRASHED:
1370                         free(shutdown_status);
1371                         goto reset;
1372                 default:
1373                         break;
1374                 }
1375                 free(shutdown_status);
1376                 sleep(1);
1377                 i--;
1378         }
1379 reset:
1380         ret = kill(mic->pid, SIGTERM);
1381         mpsslog("%s: %s %d kill pid %d ret %d\n",
1382                 mic->name, __func__, __LINE__,
1383                 mic->pid, ret);
1384         if (!ret) {
1385                 ret = waitpid(mic->pid, &stat,
1386                         WIFSIGNALED(stat));
1387                 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1388                         mic->name, __func__, __LINE__,
1389                         ret, mic->pid);
1390         }
1391         if (ret == mic->pid)
1392                 reset(mic);
1393 }
1394
1395 static void *
1396 mic_config(void *arg)
1397 {
1398         struct mic_info *mic = (struct mic_info *)arg;
1399         char *state = NULL;
1400         char pathname[PATH_MAX];
1401         int fd, ret;
1402         struct pollfd ufds[1];
1403         char value[4096];
1404
1405         snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1406                  MICSYSFSDIR, mic->name, "state");
1407
1408         fd = open(pathname, O_RDONLY);
1409         if (fd < 0) {
1410                 mpsslog("%s: opening file %s failed %s\n",
1411                         mic->name, pathname, strerror(errno));
1412                 goto error;
1413         }
1414
1415         do {
1416                 ret = lseek(fd, 0, SEEK_SET);
1417                 if (ret < 0) {
1418                         mpsslog("%s: Failed to seek to file start '%s': %s\n",
1419                                 mic->name, pathname, strerror(errno));
1420                         goto close_error1;
1421                 }
1422                 ret = read(fd, value, sizeof(value));
1423                 if (ret < 0) {
1424                         mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1425                                 mic->name, pathname, strerror(errno));
1426                         goto close_error1;
1427                 }
1428 retry:
1429                 state = readsysfs(mic->name, "state");
1430                 if (!state)
1431                         goto retry;
1432                 mpsslog("%s: %s %d state %s\n",
1433                         mic->name, __func__, __LINE__, state);
1434                 switch (get_mic_state(mic, state)) {
1435                 case MIC_SHUTTING_DOWN:
1436                         mic_handle_shutdown(mic);
1437                         goto close_error;
1438                 case MIC_SUSPENDING:
1439                         mic->boot_on_resume = 1;
1440                         setsysfs(mic->name, "state", "suspend");
1441                         mic_handle_shutdown(mic);
1442                         goto close_error;
1443                 case MIC_OFFLINE:
1444                         if (mic->boot_on_resume) {
1445                                 setsysfs(mic->name, "state", "boot");
1446                                 mic->boot_on_resume = 0;
1447                         }
1448                         break;
1449                 default:
1450                         break;
1451                 }
1452                 free(state);
1453
1454                 ufds[0].fd = fd;
1455                 ufds[0].events = POLLERR | POLLPRI;
1456                 ret = poll(ufds, 1, -1);
1457                 if (ret < 0) {
1458                         mpsslog("%s: poll failed %s\n",
1459                                 mic->name, strerror(errno));
1460                         goto close_error1;
1461                 }
1462         } while (1);
1463 close_error:
1464         free(state);
1465 close_error1:
1466         close(fd);
1467 error:
1468         init_mic(mic);
1469         pthread_exit(NULL);
1470 }
1471
1472 static void
1473 set_cmdline(struct mic_info *mic)
1474 {
1475         char buffer[PATH_MAX];
1476         int len;
1477
1478         len = snprintf(buffer, PATH_MAX,
1479                 "clocksource=tsc highres=off nohz=off ");
1480         len += snprintf(buffer + len, PATH_MAX - len,
1481                 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1482         len += snprintf(buffer + len, PATH_MAX - len,
1483                 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1484                 mic->id);
1485
1486         setsysfs(mic->name, "cmdline", buffer);
1487         mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1488         snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1489         mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1490 }
1491
1492 static void
1493 set_log_buf_info(struct mic_info *mic)
1494 {
1495         int fd;
1496         off_t len;
1497         char system_map[] = "/lib/firmware/mic/System.map";
1498         char *map, *temp, log_buf[17] = {'\0'};
1499
1500         fd = open(system_map, O_RDONLY);
1501         if (fd < 0) {
1502                 mpsslog("%s: Opening System.map failed: %d\n",
1503                         mic->name, errno);
1504                 return;
1505         }
1506         len = lseek(fd, 0, SEEK_END);
1507         if (len < 0) {
1508                 mpsslog("%s: Reading System.map size failed: %d\n",
1509                         mic->name, errno);
1510                 close(fd);
1511                 return;
1512         }
1513         map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1514         if (map == MAP_FAILED) {
1515                 mpsslog("%s: mmap of System.map failed: %d\n",
1516                         mic->name, errno);
1517                 close(fd);
1518                 return;
1519         }
1520         temp = strstr(map, "__log_buf");
1521         if (!temp) {
1522                 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1523                 munmap(map, len);
1524                 close(fd);
1525                 return;
1526         }
1527         strncpy(log_buf, temp - 19, 16);
1528         setsysfs(mic->name, "log_buf_addr", log_buf);
1529         mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1530         temp = strstr(map, "log_buf_len");
1531         if (!temp) {
1532                 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1533                 munmap(map, len);
1534                 close(fd);
1535                 return;
1536         }
1537         strncpy(log_buf, temp - 19, 16);
1538         setsysfs(mic->name, "log_buf_len", log_buf);
1539         mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1540         munmap(map, len);
1541         close(fd);
1542 }
1543
1544 static void init_mic(struct mic_info *mic);
1545
1546 static void
1547 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1548 {
1549         struct mic_info *mic;
1550
1551         for (mic = mic_list.next; mic != NULL; mic = mic->next)
1552                 mic->mic_virtblk.signaled = 1/* true */;
1553 }
1554
1555 static void
1556 init_mic(struct mic_info *mic)
1557 {
1558         struct sigaction ignore = {
1559                 .sa_flags = 0,
1560                 .sa_handler = SIG_IGN
1561         };
1562         struct sigaction act = {
1563                 .sa_flags = SA_SIGINFO,
1564                 .sa_sigaction = change_virtblk_backend,
1565         };
1566         char buffer[PATH_MAX];
1567         int err;
1568
1569         /*
1570          * Currently, one virtio block device is supported for each MIC card
1571          * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1572          * The signal informs the virtio block backend about a change in the
1573          * configuration file which specifies the virtio backend file name on
1574          * the host. Virtio block backend then re-reads the configuration file
1575          * and switches to the new block device. This signalling mechanism may
1576          * not be required once multiple virtio block devices are supported by
1577          * the MIC daemon.
1578          */
1579         sigaction(SIGUSR1, &ignore, NULL);
1580
1581         mic->pid = fork();
1582         switch (mic->pid) {
1583         case 0:
1584                 set_log_buf_info(mic);
1585                 set_cmdline(mic);
1586                 add_virtio_device(mic, &virtcons_dev_page.dd);
1587                 add_virtio_device(mic, &virtnet_dev_page.dd);
1588                 err = pthread_create(&mic->mic_console.console_thread, NULL,
1589                         virtio_console, mic);
1590                 if (err)
1591                         mpsslog("%s virtcons pthread_create failed %s\n",
1592                                 mic->name, strerror(err));
1593                 err = pthread_create(&mic->mic_net.net_thread, NULL,
1594                         virtio_net, mic);
1595                 if (err)
1596                         mpsslog("%s virtnet pthread_create failed %s\n",
1597                                 mic->name, strerror(err));
1598                 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1599                         virtio_block, mic);
1600                 if (err)
1601                         mpsslog("%s virtblk pthread_create failed %s\n",
1602                                 mic->name, strerror(err));
1603                 sigemptyset(&act.sa_mask);
1604                 err = sigaction(SIGUSR1, &act, NULL);
1605                 if (err)
1606                         mpsslog("%s sigaction SIGUSR1 failed %s\n",
1607                                 mic->name, strerror(errno));
1608                 while (1)
1609                         sleep(60);
1610         case -1:
1611                 mpsslog("fork failed MIC name %s id %d errno %d\n",
1612                         mic->name, mic->id, errno);
1613                 break;
1614         default:
1615                 if (mic->restart) {
1616                         snprintf(buffer, PATH_MAX, "boot");
1617                         setsysfs(mic->name, "state", buffer);
1618                         mpsslog("%s restarting mic %d\n",
1619                                 mic->name, mic->restart);
1620                         mic->restart = 0;
1621                 }
1622                 pthread_create(&mic->config_thread, NULL, mic_config, mic);
1623         }
1624 }
1625
1626 static void
1627 start_daemon(void)
1628 {
1629         struct mic_info *mic;
1630
1631         for (mic = mic_list.next; mic != NULL; mic = mic->next)
1632                 init_mic(mic);
1633
1634         while (1)
1635                 sleep(60);
1636 }
1637
1638 static int
1639 init_mic_list(void)
1640 {
1641         struct mic_info *mic = &mic_list;
1642         struct dirent *file;
1643         DIR *dp;
1644         int cnt = 0;
1645
1646         dp = opendir(MICSYSFSDIR);
1647         if (!dp)
1648                 return 0;
1649
1650         while ((file = readdir(dp)) != NULL) {
1651                 if (!strncmp(file->d_name, "mic", 3)) {
1652                         mic->next = calloc(1, sizeof(struct mic_info));
1653                         if (mic->next) {
1654                                 mic = mic->next;
1655                                 mic->id = atoi(&file->d_name[3]);
1656                                 mic->name = malloc(strlen(file->d_name) + 16);
1657                                 if (mic->name)
1658                                         strcpy(mic->name, file->d_name);
1659                                 mpsslog("MIC name %s id %d\n", mic->name,
1660                                         mic->id);
1661                                 cnt++;
1662                         }
1663                 }
1664         }
1665
1666         closedir(dp);
1667         return cnt;
1668 }
1669
1670 void
1671 mpsslog(char *format, ...)
1672 {
1673         va_list args;
1674         char buffer[4096];
1675         char ts[52], *ts1;
1676         time_t t;
1677
1678         if (logfp == NULL)
1679                 return;
1680
1681         va_start(args, format);
1682         vsprintf(buffer, format, args);
1683         va_end(args);
1684
1685         time(&t);
1686         ts1 = ctime_r(&t, ts);
1687         ts1[strlen(ts1) - 1] = '\0';
1688         fprintf(logfp, "%s: %s", ts1, buffer);
1689
1690         fflush(logfp);
1691 }
1692
1693 int
1694 main(int argc, char *argv[])
1695 {
1696         int cnt;
1697         pid_t pid;
1698
1699         myname = argv[0];
1700
1701         logfp = fopen(LOGFILE_NAME, "a+");
1702         if (!logfp) {
1703                 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1704                 exit(1);
1705         }
1706         pid = fork();
1707         switch (pid) {
1708         case 0:
1709                 break;
1710         case -1:
1711                 exit(2);
1712         default:
1713                 exit(0);
1714         }
1715
1716         mpsslog("MIC Daemon start\n");
1717
1718         cnt = init_mic_list();
1719         if (cnt == 0) {
1720                 mpsslog("MIC module not loaded\n");
1721                 exit(3);
1722         }
1723         mpsslog("MIC found %d devices\n", cnt);
1724
1725         start_daemon();
1726
1727         exit(0);
1728 }