Merge tag 'for-v3.18-rc' of git://git.infradead.org/battery-2.6
[linux-2.6-block.git] / Documentation / mic / mpssd / mpssd.c
CommitLineData
8d497515
CY
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC User Space Tools.
19 */
20
21#define _GNU_SOURCE
22
23#include <stdlib.h>
24#include <fcntl.h>
25#include <getopt.h>
26#include <assert.h>
27#include <unistd.h>
28#include <stdbool.h>
29#include <signal.h>
30#include <poll.h>
31#include <features.h>
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/mman.h>
35#include <sys/socket.h>
36#include <linux/virtio_ring.h>
37#include <linux/virtio_net.h>
38#include <linux/virtio_console.h>
39#include <linux/virtio_blk.h>
40#include <linux/version.h>
41#include "mpssd.h"
42#include <linux/mic_ioctl.h>
43#include <linux/mic_common.h>
8c2b0dc8 44#include <tools/endian.h>
8d497515
CY
45
46static void init_mic(struct mic_info *mic);
47
48static FILE *logfp;
49static struct mic_info mic_list;
50
51#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
52
53#define min_t(type, x, y) ({ \
54 type __min1 = (x); \
55 type __min2 = (y); \
56 __min1 < __min2 ? __min1 : __min2; })
57
58/* align addr on a size boundary - adjust address up/down if needed */
59#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
60#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
61
62/* align addr on a size boundary - adjust address up if needed */
63#define _ALIGN(addr, size) _ALIGN_UP(addr, size)
64
65/* to align the pointer to the (next) page boundary */
66#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
67
68#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
69
70#define GSO_ENABLED 1
71#define MAX_GSO_SIZE (64 * 1024)
72#define ETH_H_LEN 14
73#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74#define MIC_DEVICE_PAGE_END 0x1000
75
76#ifndef VIRTIO_NET_HDR_F_DATA_VALID
77#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
78#endif
79
80static struct {
81 struct mic_device_desc dd;
82 struct mic_vqconfig vqconfig[2];
83 __u32 host_features, guest_acknowledgements;
84 struct virtio_console_config cons_config;
85} virtcons_dev_page = {
86 .dd = {
87 .type = VIRTIO_ID_CONSOLE,
88 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89 .feature_len = sizeof(virtcons_dev_page.host_features),
90 .config_len = sizeof(virtcons_dev_page.cons_config),
91 },
92 .vqconfig[0] = {
93 .num = htole16(MIC_VRING_ENTRIES),
94 },
95 .vqconfig[1] = {
96 .num = htole16(MIC_VRING_ENTRIES),
97 },
98};
99
100static struct {
101 struct mic_device_desc dd;
102 struct mic_vqconfig vqconfig[2];
103 __u32 host_features, guest_acknowledgements;
104 struct virtio_net_config net_config;
105} virtnet_dev_page = {
106 .dd = {
107 .type = VIRTIO_ID_NET,
108 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109 .feature_len = sizeof(virtnet_dev_page.host_features),
110 .config_len = sizeof(virtnet_dev_page.net_config),
111 },
112 .vqconfig[0] = {
113 .num = htole16(MIC_VRING_ENTRIES),
114 },
115 .vqconfig[1] = {
116 .num = htole16(MIC_VRING_ENTRIES),
117 },
118#if GSO_ENABLED
119 .host_features = htole32(
120 1 << VIRTIO_NET_F_CSUM |
121 1 << VIRTIO_NET_F_GSO |
122 1 << VIRTIO_NET_F_GUEST_TSO4 |
123 1 << VIRTIO_NET_F_GUEST_TSO6 |
124 1 << VIRTIO_NET_F_GUEST_ECN |
125 1 << VIRTIO_NET_F_GUEST_UFO),
126#else
127 .host_features = 0,
128#endif
129};
130
131static const char *mic_config_dir = "/etc/sysconfig/mic";
132static const char *virtblk_backend = "VIRTBLK_BACKEND";
133static struct {
134 struct mic_device_desc dd;
135 struct mic_vqconfig vqconfig[1];
136 __u32 host_features, guest_acknowledgements;
137 struct virtio_blk_config blk_config;
138} virtblk_dev_page = {
139 .dd = {
140 .type = VIRTIO_ID_BLOCK,
141 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
142 .feature_len = sizeof(virtblk_dev_page.host_features),
143 .config_len = sizeof(virtblk_dev_page.blk_config),
144 },
145 .vqconfig[0] = {
146 .num = htole16(MIC_VRING_ENTRIES),
147 },
148 .host_features =
149 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
150 .blk_config = {
151 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
152 .capacity = htole64(0),
153 }
154};
155
156static char *myname;
157
158static int
159tap_configure(struct mic_info *mic, char *dev)
160{
161 pid_t pid;
162 char *ifargv[7];
163 char ipaddr[IFNAMSIZ];
164 int ret = 0;
165
166 pid = fork();
167 if (pid == 0) {
168 ifargv[0] = "ip";
169 ifargv[1] = "link";
170 ifargv[2] = "set";
171 ifargv[3] = dev;
172 ifargv[4] = "up";
173 ifargv[5] = NULL;
174 mpsslog("Configuring %s\n", dev);
175 ret = execvp("ip", ifargv);
176 if (ret < 0) {
177 mpsslog("%s execvp failed errno %s\n",
178 mic->name, strerror(errno));
179 return ret;
180 }
181 }
182 if (pid < 0) {
183 mpsslog("%s fork failed errno %s\n",
184 mic->name, strerror(errno));
185 return ret;
186 }
187
188 ret = waitpid(pid, NULL, 0);
189 if (ret < 0) {
190 mpsslog("%s waitpid failed errno %s\n",
191 mic->name, strerror(errno));
192 return ret;
193 }
194
195 snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
196
197 pid = fork();
198 if (pid == 0) {
199 ifargv[0] = "ip";
200 ifargv[1] = "addr";
201 ifargv[2] = "add";
202 ifargv[3] = ipaddr;
203 ifargv[4] = "dev";
204 ifargv[5] = dev;
205 ifargv[6] = NULL;
206 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
207 ret = execvp("ip", ifargv);
208 if (ret < 0) {
209 mpsslog("%s execvp failed errno %s\n",
210 mic->name, strerror(errno));
211 return ret;
212 }
213 }
214 if (pid < 0) {
215 mpsslog("%s fork failed errno %s\n",
216 mic->name, strerror(errno));
217 return ret;
218 }
219
220 ret = waitpid(pid, NULL, 0);
221 if (ret < 0) {
222 mpsslog("%s waitpid failed errno %s\n",
223 mic->name, strerror(errno));
224 return ret;
225 }
226 mpsslog("MIC name %s %s %d DONE!\n",
227 mic->name, __func__, __LINE__);
228 return 0;
229}
230
231static int tun_alloc(struct mic_info *mic, char *dev)
232{
233 struct ifreq ifr;
234 int fd, err;
235#if GSO_ENABLED
236 unsigned offload;
237#endif
238 fd = open("/dev/net/tun", O_RDWR);
239 if (fd < 0) {
240 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
241 goto done;
242 }
243
244 memset(&ifr, 0, sizeof(ifr));
245
246 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
247 if (*dev)
248 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
249
ced2c60f 250 err = ioctl(fd, TUNSETIFF, (void *)&ifr);
8d497515
CY
251 if (err < 0) {
252 mpsslog("%s %s %d TUNSETIFF failed %s\n",
253 mic->name, __func__, __LINE__, strerror(errno));
254 close(fd);
255 return err;
256 }
257#if GSO_ENABLED
258 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
259 TUN_F_TSO_ECN | TUN_F_UFO;
260
261 err = ioctl(fd, TUNSETOFFLOAD, offload);
262 if (err < 0) {
263 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
264 mic->name, __func__, __LINE__, strerror(errno));
265 close(fd);
266 return err;
267 }
268#endif
269 strcpy(dev, ifr.ifr_name);
270 mpsslog("Created TAP %s\n", dev);
271done:
272 return fd;
273}
274
275#define NET_FD_VIRTIO_NET 0
276#define NET_FD_TUN 1
277#define MAX_NET_FD 2
278
279static void set_dp(struct mic_info *mic, int type, void *dp)
280{
281 switch (type) {
282 case VIRTIO_ID_CONSOLE:
283 mic->mic_console.console_dp = dp;
284 return;
285 case VIRTIO_ID_NET:
286 mic->mic_net.net_dp = dp;
287 return;
288 case VIRTIO_ID_BLOCK:
289 mic->mic_virtblk.block_dp = dp;
290 return;
291 }
292 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
293 assert(0);
294}
295
296static void *get_dp(struct mic_info *mic, int type)
297{
298 switch (type) {
299 case VIRTIO_ID_CONSOLE:
300 return mic->mic_console.console_dp;
301 case VIRTIO_ID_NET:
302 return mic->mic_net.net_dp;
303 case VIRTIO_ID_BLOCK:
304 return mic->mic_virtblk.block_dp;
305 }
306 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
307 assert(0);
308 return NULL;
309}
310
311static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
312{
313 struct mic_device_desc *d;
314 int i;
315 void *dp = get_dp(mic, type);
316
1e31aa92 317 for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
8d497515
CY
318 i += mic_total_desc_size(d)) {
319 d = dp + i;
320
321 /* End of list */
322 if (d->type == 0)
323 break;
324
325 if (d->type == -1)
326 continue;
327
328 mpsslog("%s %s d-> type %d d %p\n",
329 mic->name, __func__, d->type, d);
330
331 if (d->type == (__u8)type)
332 return d;
333 }
334 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
335 assert(0);
336 return NULL;
337}
338
339/* See comments in vhost.c for explanation of next_desc() */
340static unsigned next_desc(struct vring_desc *desc)
341{
342 unsigned int next;
343
344 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
345 return -1U;
346 next = le16toh(desc->next);
347 return next;
348}
349
350/* Sum up all the IOVEC length */
351static ssize_t
352sum_iovec_len(struct mic_copy_desc *copy)
353{
354 ssize_t sum = 0;
355 int i;
356
357 for (i = 0; i < copy->iovcnt; i++)
358 sum += copy->iov[i].iov_len;
359 return sum;
360}
361
362static inline void verify_out_len(struct mic_info *mic,
363 struct mic_copy_desc *copy)
364{
365 if (copy->out_len != sum_iovec_len(copy)) {
6078e0be 366 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
ced2c60f 367 mic->name, __func__, __LINE__,
6078e0be 368 copy->out_len, sum_iovec_len(copy));
8d497515
CY
369 assert(copy->out_len == sum_iovec_len(copy));
370 }
371}
372
373/* Display an iovec */
374static void
375disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
ced2c60f 376 const char *s, int line)
8d497515
CY
377{
378 int i;
379
380 for (i = 0; i < copy->iovcnt; i++)
6078e0be 381 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
8d497515
CY
382 mic->name, s, line, i,
383 copy->iov[i].iov_base, copy->iov[i].iov_len);
384}
385
386static inline __u16 read_avail_idx(struct mic_vring *vr)
387{
388 return ACCESS_ONCE(vr->info->avail_idx);
389}
390
391static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
392 struct mic_copy_desc *copy, ssize_t len)
393{
394 copy->vr_idx = tx ? 0 : 1;
395 copy->update_used = true;
396 if (type == VIRTIO_ID_NET)
397 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
398 else
399 copy->iov[0].iov_len = len;
400}
401
402/* Central API which triggers the copies */
403static int
404mic_virtio_copy(struct mic_info *mic, int fd,
ced2c60f 405 struct mic_vring *vr, struct mic_copy_desc *copy)
8d497515
CY
406{
407 int ret;
408
409 ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
410 if (ret) {
411 mpsslog("%s %s %d errno %s ret %d\n",
412 mic->name, __func__, __LINE__,
413 strerror(errno), ret);
414 }
415 return ret;
416}
417
418/*
419 * This initialization routine requires at least one
420 * vring i.e. vr0. vr1 is optional.
421 */
422static void *
423init_vr(struct mic_info *mic, int fd, int type,
424 struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
425{
426 int vr_size;
427 char *va;
428
429 vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
430 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
431 va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
432 PROT_READ, MAP_SHARED, fd, 0);
433 if (MAP_FAILED == va) {
434 mpsslog("%s %s %d mmap failed errno %s\n",
435 mic->name, __func__, __LINE__,
436 strerror(errno));
437 goto done;
438 }
439 set_dp(mic, type, va);
440 vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
441 vr0->info = vr0->va +
442 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
443 vring_init(&vr0->vr,
ced2c60f 444 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
8d497515
CY
445 mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
446 __func__, mic->name, vr0->va, vr0->info, vr_size,
447 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
448 mpsslog("magic 0x%x expected 0x%x\n",
173c0727
AD
449 le32toh(vr0->info->magic), MIC_MAGIC + type);
450 assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
8d497515
CY
451 if (vr1) {
452 vr1->va = (struct mic_vring *)
453 &va[MIC_DEVICE_PAGE_END + vr_size];
454 vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
455 MIC_VIRTIO_RING_ALIGN);
456 vring_init(&vr1->vr,
ced2c60f 457 MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
8d497515
CY
458 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
459 __func__, mic->name, vr1->va, vr1->info, vr_size,
460 vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
461 mpsslog("magic 0x%x expected 0x%x\n",
173c0727
AD
462 le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
463 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
8d497515
CY
464 }
465done:
466 return va;
467}
468
469static void
470wait_for_card_driver(struct mic_info *mic, int fd, int type)
471{
472 struct pollfd pollfd;
473 int err;
474 struct mic_device_desc *desc = get_device_desc(mic, type);
475
476 pollfd.fd = fd;
477 mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
478 mic->name, __func__, type, desc->status);
479 while (1) {
480 pollfd.events = POLLIN;
481 pollfd.revents = 0;
482 err = poll(&pollfd, 1, -1);
483 if (err < 0) {
484 mpsslog("%s %s poll failed %s\n",
485 mic->name, __func__, strerror(errno));
486 continue;
487 }
488
489 if (pollfd.revents) {
490 mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
491 mic->name, __func__, type, desc->status);
492 if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
493 mpsslog("%s %s poll.revents %d\n",
494 mic->name, __func__, pollfd.revents);
495 mpsslog("%s %s desc-> type %d status 0x%x\n",
496 mic->name, __func__, type,
497 desc->status);
498 break;
499 }
500 }
501 }
502}
503
504/* Spin till we have some descriptors */
505static void
506spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
507{
508 __u16 avail_idx = read_avail_idx(vr);
509
510 while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
511#ifdef DEBUG
512 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
513 mic->name, __func__,
514 le16toh(vr->vr.avail->idx), vr->info->avail_idx);
515#endif
516 sched_yield();
517 }
518}
519
520static void *
521virtio_net(void *arg)
522{
523 static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
1e31aa92 524 static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
8d497515
CY
525 struct iovec vnet_iov[2][2] = {
526 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
527 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
528 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
529 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
530 };
531 struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
532 struct mic_info *mic = (struct mic_info *)arg;
533 char if_name[IFNAMSIZ];
534 struct pollfd net_poll[MAX_NET_FD];
535 struct mic_vring tx_vr, rx_vr;
536 struct mic_copy_desc copy;
537 struct mic_device_desc *desc;
538 int err;
539
540 snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
541 mic->mic_net.tap_fd = tun_alloc(mic, if_name);
542 if (mic->mic_net.tap_fd < 0)
543 goto done;
544
545 if (tap_configure(mic, if_name))
546 goto done;
547 mpsslog("MIC name %s id %d\n", mic->name, mic->id);
548
549 net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
550 net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
551 net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
552 net_poll[NET_FD_TUN].events = POLLIN;
553
554 if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
ced2c60f 555 VIRTIO_ID_NET, &tx_vr, &rx_vr,
8d497515
CY
556 virtnet_dev_page.dd.num_vq)) {
557 mpsslog("%s init_vr failed %s\n",
558 mic->name, strerror(errno));
559 goto done;
560 }
561
562 copy.iovcnt = 2;
563 desc = get_device_desc(mic, VIRTIO_ID_NET);
564
565 while (1) {
566 ssize_t len;
567
568 net_poll[NET_FD_VIRTIO_NET].revents = 0;
569 net_poll[NET_FD_TUN].revents = 0;
570
571 /* Start polling for data from tap and virtio net */
572 err = poll(net_poll, 2, -1);
573 if (err < 0) {
574 mpsslog("%s poll failed %s\n",
575 __func__, strerror(errno));
576 continue;
577 }
578 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
579 wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
ced2c60f 580 VIRTIO_ID_NET);
8d497515
CY
581 /*
582 * Check if there is data to be read from TUN and write to
583 * virtio net fd if there is.
584 */
585 if (net_poll[NET_FD_TUN].revents & POLLIN) {
586 copy.iov = iov0;
587 len = readv(net_poll[NET_FD_TUN].fd,
588 copy.iov, copy.iovcnt);
589 if (len > 0) {
590 struct virtio_net_hdr *hdr
ced2c60f 591 = (struct virtio_net_hdr *)vnet_hdr[0];
8d497515
CY
592
593 /* Disable checksums on the card since we are on
594 a reliable PCIe link */
595 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
596#ifdef DEBUG
597 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
598 __func__, __LINE__, hdr->flags);
599 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
600 copy.out_len, hdr->gso_type);
601#endif
602#ifdef DEBUG
603 disp_iovec(mic, copy, __func__, __LINE__);
604 mpsslog("%s %s %d read from tap 0x%lx\n",
605 mic->name, __func__, __LINE__,
606 len);
607#endif
608 spin_for_descriptors(mic, &tx_vr);
609 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
ced2c60f 610 len);
8d497515
CY
611
612 err = mic_virtio_copy(mic,
613 mic->mic_net.virtio_net_fd, &tx_vr,
614 &copy);
615 if (err < 0) {
616 mpsslog("%s %s %d mic_virtio_copy %s\n",
617 mic->name, __func__, __LINE__,
618 strerror(errno));
619 }
620 if (!err)
621 verify_out_len(mic, &copy);
622#ifdef DEBUG
623 disp_iovec(mic, copy, __func__, __LINE__);
624 mpsslog("%s %s %d wrote to net 0x%lx\n",
625 mic->name, __func__, __LINE__,
626 sum_iovec_len(&copy));
627#endif
628 /* Reinitialize IOV for next run */
629 iov0[1].iov_len = MAX_NET_PKT_SIZE;
630 } else if (len < 0) {
631 disp_iovec(mic, &copy, __func__, __LINE__);
632 mpsslog("%s %s %d read failed %s ", mic->name,
633 __func__, __LINE__, strerror(errno));
6078e0be 634 mpsslog("cnt %d sum %zd\n",
8d497515
CY
635 copy.iovcnt, sum_iovec_len(&copy));
636 }
637 }
638
639 /*
640 * Check if there is data to be read from virtio net and
641 * write to TUN if there is.
642 */
643 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
644 while (rx_vr.info->avail_idx !=
645 le16toh(rx_vr.vr.avail->idx)) {
646 copy.iov = iov1;
647 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
ced2c60f 648 MAX_NET_PKT_SIZE
8d497515
CY
649 + sizeof(struct virtio_net_hdr));
650
651 err = mic_virtio_copy(mic,
652 mic->mic_net.virtio_net_fd, &rx_vr,
653 &copy);
654 if (!err) {
655#ifdef DEBUG
656 struct virtio_net_hdr *hdr
657 = (struct virtio_net_hdr *)
658 vnet_hdr[1];
659
660 mpsslog("%s %s %d hdr->flags 0x%x, ",
661 mic->name, __func__, __LINE__,
662 hdr->flags);
663 mpsslog("out_len %d gso_type 0x%x\n",
664 copy.out_len,
665 hdr->gso_type);
666#endif
667 /* Set the correct output iov_len */
668 iov1[1].iov_len = copy.out_len -
669 sizeof(struct virtio_net_hdr);
670 verify_out_len(mic, &copy);
671#ifdef DEBUG
672 disp_iovec(mic, copy, __func__,
ced2c60f 673 __LINE__);
8d497515
CY
674 mpsslog("%s %s %d ",
675 mic->name, __func__, __LINE__);
676 mpsslog("read from net 0x%lx\n",
677 sum_iovec_len(copy));
678#endif
679 len = writev(net_poll[NET_FD_TUN].fd,
680 copy.iov, copy.iovcnt);
681 if (len != sum_iovec_len(&copy)) {
682 mpsslog("Tun write failed %s ",
683 strerror(errno));
6078e0be
SD
684 mpsslog("len 0x%zx ", len);
685 mpsslog("read_len 0x%zx\n",
8d497515
CY
686 sum_iovec_len(&copy));
687 } else {
688#ifdef DEBUG
689 disp_iovec(mic, &copy, __func__,
ced2c60f 690 __LINE__);
8d497515
CY
691 mpsslog("%s %s %d ",
692 mic->name, __func__,
693 __LINE__);
694 mpsslog("wrote to tap 0x%lx\n",
695 len);
696#endif
697 }
698 } else {
699 mpsslog("%s %s %d mic_virtio_copy %s\n",
700 mic->name, __func__, __LINE__,
701 strerror(errno));
702 break;
703 }
704 }
705 }
706 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
707 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
708 }
709done:
710 pthread_exit(NULL);
711}
712
713/* virtio_console */
714#define VIRTIO_CONSOLE_FD 0
715#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
716#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
717#define MAX_BUFFER_SIZE PAGE_SIZE
718
719static void *
720virtio_console(void *arg)
721{
722 static __u8 vcons_buf[2][PAGE_SIZE];
723 struct iovec vcons_iov[2] = {
724 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
725 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
726 };
727 struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
728 struct mic_info *mic = (struct mic_info *)arg;
729 int err;
730 struct pollfd console_poll[MAX_CONSOLE_FD];
731 int pty_fd;
732 char *pts_name;
733 ssize_t len;
734 struct mic_vring tx_vr, rx_vr;
735 struct mic_copy_desc copy;
736 struct mic_device_desc *desc;
737
738 pty_fd = posix_openpt(O_RDWR);
739 if (pty_fd < 0) {
740 mpsslog("can't open a pseudoterminal master device: %s\n",
741 strerror(errno));
742 goto _return;
743 }
744 pts_name = ptsname(pty_fd);
745 if (pts_name == NULL) {
746 mpsslog("can't get pts name\n");
747 goto _close_pty;
748 }
749 printf("%s console message goes to %s\n", mic->name, pts_name);
750 mpsslog("%s console message goes to %s\n", mic->name, pts_name);
751 err = grantpt(pty_fd);
752 if (err < 0) {
753 mpsslog("can't grant access: %s %s\n",
ced2c60f 754 pts_name, strerror(errno));
8d497515
CY
755 goto _close_pty;
756 }
757 err = unlockpt(pty_fd);
758 if (err < 0) {
759 mpsslog("can't unlock a pseudoterminal: %s %s\n",
ced2c60f 760 pts_name, strerror(errno));
8d497515
CY
761 goto _close_pty;
762 }
763 console_poll[MONITOR_FD].fd = pty_fd;
764 console_poll[MONITOR_FD].events = POLLIN;
765
766 console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
767 console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
768
769 if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
ced2c60f 770 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
8d497515
CY
771 virtcons_dev_page.dd.num_vq)) {
772 mpsslog("%s init_vr failed %s\n",
773 mic->name, strerror(errno));
774 goto _close_pty;
775 }
776
777 copy.iovcnt = 1;
778 desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
779
780 for (;;) {
781 console_poll[MONITOR_FD].revents = 0;
782 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
783 err = poll(console_poll, MAX_CONSOLE_FD, -1);
784 if (err < 0) {
785 mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
786 strerror(errno));
787 continue;
788 }
789 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
790 wait_for_card_driver(mic,
ced2c60f 791 mic->mic_console.virtio_console_fd,
8d497515
CY
792 VIRTIO_ID_CONSOLE);
793
794 if (console_poll[MONITOR_FD].revents & POLLIN) {
795 copy.iov = iov0;
796 len = readv(pty_fd, copy.iov, copy.iovcnt);
797 if (len > 0) {
798#ifdef DEBUG
799 disp_iovec(mic, copy, __func__, __LINE__);
800 mpsslog("%s %s %d read from tap 0x%lx\n",
801 mic->name, __func__, __LINE__,
802 len);
803#endif
804 spin_for_descriptors(mic, &tx_vr);
805 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
ced2c60f 806 &copy, len);
8d497515
CY
807
808 err = mic_virtio_copy(mic,
809 mic->mic_console.virtio_console_fd,
810 &tx_vr, &copy);
811 if (err < 0) {
812 mpsslog("%s %s %d mic_virtio_copy %s\n",
813 mic->name, __func__, __LINE__,
814 strerror(errno));
815 }
816 if (!err)
817 verify_out_len(mic, &copy);
818#ifdef DEBUG
819 disp_iovec(mic, copy, __func__, __LINE__);
820 mpsslog("%s %s %d wrote to net 0x%lx\n",
821 mic->name, __func__, __LINE__,
822 sum_iovec_len(copy));
823#endif
824 /* Reinitialize IOV for next run */
825 iov0->iov_len = PAGE_SIZE;
826 } else if (len < 0) {
827 disp_iovec(mic, &copy, __func__, __LINE__);
828 mpsslog("%s %s %d read failed %s ",
829 mic->name, __func__, __LINE__,
830 strerror(errno));
6078e0be 831 mpsslog("cnt %d sum %zd\n",
8d497515
CY
832 copy.iovcnt, sum_iovec_len(&copy));
833 }
834 }
835
836 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
837 while (rx_vr.info->avail_idx !=
838 le16toh(rx_vr.vr.avail->idx)) {
839 copy.iov = iov1;
840 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
ced2c60f 841 &copy, PAGE_SIZE);
8d497515
CY
842
843 err = mic_virtio_copy(mic,
844 mic->mic_console.virtio_console_fd,
845 &rx_vr, &copy);
846 if (!err) {
847 /* Set the correct output iov_len */
848 iov1->iov_len = copy.out_len;
849 verify_out_len(mic, &copy);
850#ifdef DEBUG
851 disp_iovec(mic, copy, __func__,
ced2c60f 852 __LINE__);
8d497515
CY
853 mpsslog("%s %s %d ",
854 mic->name, __func__, __LINE__);
855 mpsslog("read from net 0x%lx\n",
856 sum_iovec_len(copy));
857#endif
858 len = writev(pty_fd,
859 copy.iov, copy.iovcnt);
860 if (len != sum_iovec_len(&copy)) {
861 mpsslog("Tun write failed %s ",
862 strerror(errno));
6078e0be
SD
863 mpsslog("len 0x%zx ", len);
864 mpsslog("read_len 0x%zx\n",
8d497515
CY
865 sum_iovec_len(&copy));
866 } else {
867#ifdef DEBUG
868 disp_iovec(mic, copy, __func__,
ced2c60f 869 __LINE__);
8d497515
CY
870 mpsslog("%s %s %d ",
871 mic->name, __func__,
872 __LINE__);
873 mpsslog("wrote to tap 0x%lx\n",
874 len);
875#endif
876 }
877 } else {
878 mpsslog("%s %s %d mic_virtio_copy %s\n",
879 mic->name, __func__, __LINE__,
880 strerror(errno));
881 break;
882 }
883 }
884 }
885 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
886 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
887 }
888_close_pty:
889 close(pty_fd);
890_return:
891 pthread_exit(NULL);
892}
893
894static void
895add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
896{
897 char path[PATH_MAX];
898 int fd, err;
899
900 snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
901 fd = open(path, O_RDWR);
902 if (fd < 0) {
903 mpsslog("Could not open %s %s\n", path, strerror(errno));
904 return;
905 }
906
907 err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
908 if (err < 0) {
909 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
910 close(fd);
911 return;
912 }
913 switch (dd->type) {
914 case VIRTIO_ID_NET:
915 mic->mic_net.virtio_net_fd = fd;
916 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
917 break;
918 case VIRTIO_ID_CONSOLE:
919 mic->mic_console.virtio_console_fd = fd;
920 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
921 break;
922 case VIRTIO_ID_BLOCK:
923 mic->mic_virtblk.virtio_block_fd = fd;
924 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
925 break;
926 }
927}
928
929static bool
930set_backend_file(struct mic_info *mic)
931{
932 FILE *config;
933 char buff[PATH_MAX], *line, *evv, *p;
934
935 snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
936 config = fopen(buff, "r");
937 if (config == NULL)
938 return false;
939 do { /* look for "virtblk_backend=XXXX" */
940 line = fgets(buff, PATH_MAX, config);
941 if (line == NULL)
942 break;
943 if (*line == '#')
944 continue;
945 p = strchr(line, '\n');
946 if (p)
947 *p = '\0';
948 } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
949 fclose(config);
950 if (line == NULL)
951 return false;
952 evv = strchr(line, '=');
953 if (evv == NULL)
954 return false;
955 mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
956 if (mic->mic_virtblk.backend_file == NULL) {
6078e0be 957 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
8d497515
CY
958 return false;
959 }
960 strcpy(mic->mic_virtblk.backend_file, evv + 1);
961 return true;
962}
963
964#define SECTOR_SIZE 512
965static bool
966set_backend_size(struct mic_info *mic)
967{
968 mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
969 SEEK_END);
970 if (mic->mic_virtblk.backend_size < 0) {
971 mpsslog("%s: can't seek: %s\n",
972 mic->name, mic->mic_virtblk.backend_file);
973 return false;
974 }
975 virtblk_dev_page.blk_config.capacity =
976 mic->mic_virtblk.backend_size / SECTOR_SIZE;
977 if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
978 virtblk_dev_page.blk_config.capacity++;
979
980 virtblk_dev_page.blk_config.capacity =
981 htole64(virtblk_dev_page.blk_config.capacity);
982
983 return true;
984}
985
986static bool
987open_backend(struct mic_info *mic)
988{
989 if (!set_backend_file(mic))
990 goto _error_exit;
991 mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
992 if (mic->mic_virtblk.backend < 0) {
993 mpsslog("%s: can't open: %s\n", mic->name,
994 mic->mic_virtblk.backend_file);
995 goto _error_free;
996 }
997 if (!set_backend_size(mic))
998 goto _error_close;
999 mic->mic_virtblk.backend_addr = mmap(NULL,
1000 mic->mic_virtblk.backend_size,
1001 PROT_READ|PROT_WRITE, MAP_SHARED,
1002 mic->mic_virtblk.backend, 0L);
1003 if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1004 mpsslog("%s: can't map: %s %s\n",
1005 mic->name, mic->mic_virtblk.backend_file,
1006 strerror(errno));
1007 goto _error_close;
1008 }
1009 return true;
1010
1011 _error_close:
1012 close(mic->mic_virtblk.backend);
1013 _error_free:
1014 free(mic->mic_virtblk.backend_file);
1015 _error_exit:
1016 return false;
1017}
1018
1019static void
1020close_backend(struct mic_info *mic)
1021{
1022 munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1023 close(mic->mic_virtblk.backend);
1024 free(mic->mic_virtblk.backend_file);
1025}
1026
1027static bool
1028start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1029{
6078e0be 1030 if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
8d497515
CY
1031 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1032 mic->name);
1033 return false;
1034 }
1035 add_virtio_device(mic, &virtblk_dev_page.dd);
1036 if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
ced2c60f
AD
1037 VIRTIO_ID_BLOCK, vring, NULL,
1038 virtblk_dev_page.dd.num_vq)) {
8d497515
CY
1039 mpsslog("%s init_vr failed %s\n",
1040 mic->name, strerror(errno));
1041 return false;
1042 }
1043 return true;
1044}
1045
1046static void
1047stop_virtblk(struct mic_info *mic)
1048{
1049 int vr_size, ret;
1050
1051 vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1052 MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1053 ret = munmap(mic->mic_virtblk.block_dp,
1054 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1055 if (ret < 0)
1056 mpsslog("%s munmap errno %d\n", mic->name, errno);
1057 close(mic->mic_virtblk.virtio_block_fd);
1058}
1059
1060static __u8
1061header_error_check(struct vring_desc *desc)
1062{
1063 if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1064 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
ced2c60f 1065 __func__, __LINE__);
8d497515
CY
1066 return -EIO;
1067 }
1068 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1069 mpsslog("%s() %d: alone\n",
1070 __func__, __LINE__);
1071 return -EIO;
1072 }
1073 if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1074 mpsslog("%s() %d: not read\n",
1075 __func__, __LINE__);
1076 return -EIO;
1077 }
1078 return 0;
1079}
1080
1081static int
1082read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1083{
1084 struct iovec iovec;
1085 struct mic_copy_desc copy;
1086
1087 iovec.iov_len = sizeof(*hdr);
1088 iovec.iov_base = hdr;
1089 copy.iov = &iovec;
1090 copy.iovcnt = 1;
1091 copy.vr_idx = 0; /* only one vring on virtio_block */
1092 copy.update_used = false; /* do not update used index */
1093 return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1094}
1095
1096static int
1097transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1098{
1099 struct mic_copy_desc copy;
1100
1101 copy.iov = iovec;
1102 copy.iovcnt = iovcnt;
1103 copy.vr_idx = 0; /* only one vring on virtio_block */
1104 copy.update_used = false; /* do not update used index */
1105 return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1106}
1107
1108static __u8
1109status_error_check(struct vring_desc *desc)
1110{
1111 if (le32toh(desc->len) != sizeof(__u8)) {
1112 mpsslog("%s() %d: length is not sizeof(status)\n",
1113 __func__, __LINE__);
1114 return -EIO;
1115 }
1116 return 0;
1117}
1118
1119static int
1120write_status(int fd, __u8 *status)
1121{
1122 struct iovec iovec;
1123 struct mic_copy_desc copy;
1124
1125 iovec.iov_base = status;
1126 iovec.iov_len = sizeof(*status);
1127 copy.iov = &iovec;
1128 copy.iovcnt = 1;
1129 copy.vr_idx = 0; /* only one vring on virtio_block */
1130 copy.update_used = true; /* Update used index */
1131 return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1132}
1133
1134static void *
1135virtio_block(void *arg)
1136{
ced2c60f 1137 struct mic_info *mic = (struct mic_info *)arg;
8d497515
CY
1138 int ret;
1139 struct pollfd block_poll;
1140 struct mic_vring vring;
1141 __u16 avail_idx;
1142 __u32 desc_idx;
1143 struct vring_desc *desc;
1144 struct iovec *iovec, *piov;
1145 __u8 status;
1146 __u32 buffer_desc_idx;
1147 struct virtio_blk_outhdr hdr;
1148 void *fos;
1149
1150 for (;;) { /* forever */
1151 if (!open_backend(mic)) { /* No virtblk */
1152 for (mic->mic_virtblk.signaled = 0;
1153 !mic->mic_virtblk.signaled;)
1154 sleep(1);
1155 continue;
1156 }
1157
1158 /* backend file is specified. */
1159 if (!start_virtblk(mic, &vring))
1160 goto _close_backend;
1161 iovec = malloc(sizeof(*iovec) *
1162 le32toh(virtblk_dev_page.blk_config.seg_max));
1163 if (!iovec) {
1164 mpsslog("%s: can't alloc iovec: %s\n",
1165 mic->name, strerror(ENOMEM));
1166 goto _stop_virtblk;
1167 }
1168
1169 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1170 block_poll.events = POLLIN;
1171 for (mic->mic_virtblk.signaled = 0;
1172 !mic->mic_virtblk.signaled;) {
1173 block_poll.revents = 0;
1174 /* timeout in 1 sec to see signaled */
1175 ret = poll(&block_poll, 1, 1000);
1176 if (ret < 0) {
1177 mpsslog("%s %d: poll failed: %s\n",
1178 __func__, __LINE__,
1179 strerror(errno));
1180 continue;
1181 }
1182
1183 if (!(block_poll.revents & POLLIN)) {
1184#ifdef DEBUG
1185 mpsslog("%s %d: block_poll.revents=0x%x\n",
1186 __func__, __LINE__, block_poll.revents);
1187#endif
1188 continue;
1189 }
1190
1191 /* POLLIN */
1192 while (vring.info->avail_idx !=
1193 le16toh(vring.vr.avail->idx)) {
1194 /* read header element */
1195 avail_idx =
1196 vring.info->avail_idx &
1197 (vring.vr.num - 1);
1198 desc_idx = le16toh(
1199 vring.vr.avail->ring[avail_idx]);
1200 desc = &vring.vr.desc[desc_idx];
1201#ifdef DEBUG
1202 mpsslog("%s() %d: avail_idx=%d ",
1203 __func__, __LINE__,
1204 vring.info->avail_idx);
1205 mpsslog("vring.vr.num=%d desc=%p\n",
1206 vring.vr.num, desc);
1207#endif
1208 status = header_error_check(desc);
1209 ret = read_header(
1210 mic->mic_virtblk.virtio_block_fd,
1211 &hdr, desc_idx);
1212 if (ret < 0) {
1213 mpsslog("%s() %d %s: ret=%d %s\n",
1214 __func__, __LINE__,
1215 mic->name, ret,
1216 strerror(errno));
1217 break;
1218 }
1219 /* buffer element */
1220 piov = iovec;
1221 status = 0;
1222 fos = mic->mic_virtblk.backend_addr +
1223 (hdr.sector * SECTOR_SIZE);
ced2c60f
AD
1224 buffer_desc_idx = next_desc(desc);
1225 desc_idx = buffer_desc_idx;
8d497515
CY
1226 for (desc = &vring.vr.desc[buffer_desc_idx];
1227 desc->flags & VRING_DESC_F_NEXT;
1228 desc_idx = next_desc(desc),
1229 desc = &vring.vr.desc[desc_idx]) {
1230 piov->iov_len = desc->len;
1231 piov->iov_base = fos;
1232 piov++;
1233 fos += desc->len;
1234 }
1235 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1236 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1237 VIRTIO_BLK_T_GET_ID)) {
1238 /*
1239 VIRTIO_BLK_T_IN - does not do
1240 anything. Probably for documenting.
1241 VIRTIO_BLK_T_SCSI_CMD - for
1242 virtio_scsi.
1243 VIRTIO_BLK_T_FLUSH - turned off in
1244 config space.
1245 VIRTIO_BLK_T_BARRIER - defined but not
1246 used in anywhere.
1247 */
1248 mpsslog("%s() %d: type %x ",
1249 __func__, __LINE__,
1250 hdr.type);
1251 mpsslog("is not supported\n");
1252 status = -ENOTSUP;
1253
1254 } else {
1255 ret = transfer_blocks(
1256 mic->mic_virtblk.virtio_block_fd,
1257 iovec,
1258 piov - iovec);
1259 if (ret < 0 &&
ced2c60f 1260 status != 0)
8d497515
CY
1261 status = ret;
1262 }
1263 /* write status and update used pointer */
1264 if (status != 0)
1265 status = status_error_check(desc);
1266 ret = write_status(
1267 mic->mic_virtblk.virtio_block_fd,
1268 &status);
1269#ifdef DEBUG
1270 mpsslog("%s() %d: write status=%d on desc=%p\n",
1271 __func__, __LINE__,
1272 status, desc);
1273#endif
1274 }
1275 }
1276 free(iovec);
1277_stop_virtblk:
1278 stop_virtblk(mic);
1279_close_backend:
1280 close_backend(mic);
1281 } /* forever */
1282
1283 pthread_exit(NULL);
1284}
1285
1286static void
1287reset(struct mic_info *mic)
1288{
1289#define RESET_TIMEOUT 120
1290 int i = RESET_TIMEOUT;
1291 setsysfs(mic->name, "state", "reset");
1292 while (i) {
1293 char *state;
1294 state = readsysfs(mic->name, "state");
1295 if (!state)
1296 goto retry;
1297 mpsslog("%s: %s %d state %s\n",
1298 mic->name, __func__, __LINE__, state);
af190494
DC
1299
1300 /*
1301 * If the shutdown was initiated by OSPM, the state stays
1302 * in "suspended" which is also a valid condition for reset.
1303 */
1304 if ((!strcmp(state, "offline")) ||
1305 (!strcmp(state, "suspended"))) {
8d497515
CY
1306 free(state);
1307 break;
1308 }
1309 free(state);
1310retry:
1311 sleep(1);
1312 i--;
1313 }
1314}
1315
1316static int
1317get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1318{
1319 if (!strcmp(shutdown_status, "nop"))
1320 return MIC_NOP;
1321 if (!strcmp(shutdown_status, "crashed"))
1322 return MIC_CRASHED;
1323 if (!strcmp(shutdown_status, "halted"))
1324 return MIC_HALTED;
1325 if (!strcmp(shutdown_status, "poweroff"))
1326 return MIC_POWER_OFF;
1327 if (!strcmp(shutdown_status, "restart"))
1328 return MIC_RESTART;
1329 mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1330 /* Invalid state */
1331 assert(0);
1332};
1333
1334static int get_mic_state(struct mic_info *mic, char *state)
1335{
1336 if (!strcmp(state, "offline"))
1337 return MIC_OFFLINE;
1338 if (!strcmp(state, "online"))
1339 return MIC_ONLINE;
1340 if (!strcmp(state, "shutting_down"))
1341 return MIC_SHUTTING_DOWN;
1342 if (!strcmp(state, "reset_failed"))
1343 return MIC_RESET_FAILED;
af190494
DC
1344 if (!strcmp(state, "suspending"))
1345 return MIC_SUSPENDING;
1346 if (!strcmp(state, "suspended"))
1347 return MIC_SUSPENDED;
8d497515
CY
1348 mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1349 /* Invalid state */
1350 assert(0);
1351};
1352
1353static void mic_handle_shutdown(struct mic_info *mic)
1354{
1355#define SHUTDOWN_TIMEOUT 60
1356 int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1357 char *shutdown_status;
1358 while (i) {
1359 shutdown_status = readsysfs(mic->name, "shutdown_status");
1360 if (!shutdown_status)
1361 continue;
1362 mpsslog("%s: %s %d shutdown_status %s\n",
1363 mic->name, __func__, __LINE__, shutdown_status);
1364 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1365 case MIC_RESTART:
1366 mic->restart = 1;
1367 case MIC_HALTED:
1368 case MIC_POWER_OFF:
1369 case MIC_CRASHED:
1370 free(shutdown_status);
1371 goto reset;
1372 default:
1373 break;
1374 }
1375 free(shutdown_status);
1376 sleep(1);
1377 i--;
1378 }
1379reset:
1380 ret = kill(mic->pid, SIGTERM);
1381 mpsslog("%s: %s %d kill pid %d ret %d\n",
1382 mic->name, __func__, __LINE__,
1383 mic->pid, ret);
1384 if (!ret) {
1385 ret = waitpid(mic->pid, &stat,
1386 WIFSIGNALED(stat));
1387 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1388 mic->name, __func__, __LINE__,
1389 ret, mic->pid);
1390 }
1391 if (ret == mic->pid)
1392 reset(mic);
1393}
1394
1395static void *
1396mic_config(void *arg)
1397{
1398 struct mic_info *mic = (struct mic_info *)arg;
1399 char *state = NULL;
1400 char pathname[PATH_MAX];
1401 int fd, ret;
1402 struct pollfd ufds[1];
1403 char value[4096];
1404
1405 snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
ced2c60f 1406 MICSYSFSDIR, mic->name, "state");
8d497515
CY
1407
1408 fd = open(pathname, O_RDONLY);
1409 if (fd < 0) {
1410 mpsslog("%s: opening file %s failed %s\n",
1411 mic->name, pathname, strerror(errno));
1412 goto error;
1413 }
1414
1415 do {
286c2402
AD
1416 ret = lseek(fd, 0, SEEK_SET);
1417 if (ret < 0) {
1418 mpsslog("%s: Failed to seek to file start '%s': %s\n",
1419 mic->name, pathname, strerror(errno));
1420 goto close_error1;
1421 }
8d497515
CY
1422 ret = read(fd, value, sizeof(value));
1423 if (ret < 0) {
1424 mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1425 mic->name, pathname, strerror(errno));
1426 goto close_error1;
1427 }
1428retry:
1429 state = readsysfs(mic->name, "state");
1430 if (!state)
1431 goto retry;
1432 mpsslog("%s: %s %d state %s\n",
1433 mic->name, __func__, __LINE__, state);
1434 switch (get_mic_state(mic, state)) {
1435 case MIC_SHUTTING_DOWN:
1436 mic_handle_shutdown(mic);
1437 goto close_error;
af190494
DC
1438 case MIC_SUSPENDING:
1439 mic->boot_on_resume = 1;
1440 setsysfs(mic->name, "state", "suspend");
1441 mic_handle_shutdown(mic);
1442 goto close_error;
1443 case MIC_OFFLINE:
1444 if (mic->boot_on_resume) {
1445 setsysfs(mic->name, "state", "boot");
1446 mic->boot_on_resume = 0;
1447 }
1448 break;
8d497515
CY
1449 default:
1450 break;
1451 }
1452 free(state);
1453
1454 ufds[0].fd = fd;
1455 ufds[0].events = POLLERR | POLLPRI;
1456 ret = poll(ufds, 1, -1);
1457 if (ret < 0) {
1458 mpsslog("%s: poll failed %s\n",
1459 mic->name, strerror(errno));
1460 goto close_error1;
1461 }
1462 } while (1);
1463close_error:
1464 free(state);
1465close_error1:
1466 close(fd);
1467error:
1468 init_mic(mic);
1469 pthread_exit(NULL);
1470}
1471
1472static void
1473set_cmdline(struct mic_info *mic)
1474{
1475 char buffer[PATH_MAX];
1476 int len;
1477
1478 len = snprintf(buffer, PATH_MAX,
1479 "clocksource=tsc highres=off nohz=off ");
6ab0e475 1480 len += snprintf(buffer + len, PATH_MAX - len,
8d497515 1481 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
6ab0e475 1482 len += snprintf(buffer + len, PATH_MAX - len,
8d497515
CY
1483 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1484 mic->id);
1485
1486 setsysfs(mic->name, "cmdline", buffer);
1487 mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1488 snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1489 mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1490}
1491
1492static void
1493set_log_buf_info(struct mic_info *mic)
1494{
1495 int fd;
1496 off_t len;
1497 char system_map[] = "/lib/firmware/mic/System.map";
1498 char *map, *temp, log_buf[17] = {'\0'};
1499
1500 fd = open(system_map, O_RDONLY);
1501 if (fd < 0) {
1502 mpsslog("%s: Opening System.map failed: %d\n",
1503 mic->name, errno);
1504 return;
1505 }
1506 len = lseek(fd, 0, SEEK_END);
1507 if (len < 0) {
1508 mpsslog("%s: Reading System.map size failed: %d\n",
1509 mic->name, errno);
1510 close(fd);
1511 return;
1512 }
1513 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1514 if (map == MAP_FAILED) {
1515 mpsslog("%s: mmap of System.map failed: %d\n",
1516 mic->name, errno);
1517 close(fd);
1518 return;
1519 }
1520 temp = strstr(map, "__log_buf");
1521 if (!temp) {
1522 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1523 munmap(map, len);
1524 close(fd);
1525 return;
1526 }
1527 strncpy(log_buf, temp - 19, 16);
1528 setsysfs(mic->name, "log_buf_addr", log_buf);
1529 mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1530 temp = strstr(map, "log_buf_len");
1531 if (!temp) {
1532 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1533 munmap(map, len);
1534 close(fd);
1535 return;
1536 }
1537 strncpy(log_buf, temp - 19, 16);
1538 setsysfs(mic->name, "log_buf_len", log_buf);
1539 mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1540 munmap(map, len);
1541 close(fd);
1542}
1543
1544static void init_mic(struct mic_info *mic);
1545
1546static void
1547change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1548{
1549 struct mic_info *mic;
1550
1551 for (mic = mic_list.next; mic != NULL; mic = mic->next)
1552 mic->mic_virtblk.signaled = 1/* true */;
1553}
1554
1555static void
1556init_mic(struct mic_info *mic)
1557{
1558 struct sigaction ignore = {
1559 .sa_flags = 0,
1560 .sa_handler = SIG_IGN
1561 };
1562 struct sigaction act = {
1563 .sa_flags = SA_SIGINFO,
1564 .sa_sigaction = change_virtblk_backend,
1565 };
1566 char buffer[PATH_MAX];
1567 int err;
1568
1569 /*
1570 * Currently, one virtio block device is supported for each MIC card
1571 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1572 * The signal informs the virtio block backend about a change in the
1573 * configuration file which specifies the virtio backend file name on
1574 * the host. Virtio block backend then re-reads the configuration file
1575 * and switches to the new block device. This signalling mechanism may
1576 * not be required once multiple virtio block devices are supported by
1577 * the MIC daemon.
1578 */
1579 sigaction(SIGUSR1, &ignore, NULL);
1580
1581 mic->pid = fork();
1582 switch (mic->pid) {
1583 case 0:
1584 set_log_buf_info(mic);
1585 set_cmdline(mic);
1586 add_virtio_device(mic, &virtcons_dev_page.dd);
1587 add_virtio_device(mic, &virtnet_dev_page.dd);
1588 err = pthread_create(&mic->mic_console.console_thread, NULL,
1589 virtio_console, mic);
1590 if (err)
1591 mpsslog("%s virtcons pthread_create failed %s\n",
ced2c60f 1592 mic->name, strerror(err));
8d497515
CY
1593 err = pthread_create(&mic->mic_net.net_thread, NULL,
1594 virtio_net, mic);
1595 if (err)
1596 mpsslog("%s virtnet pthread_create failed %s\n",
ced2c60f 1597 mic->name, strerror(err));
8d497515
CY
1598 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1599 virtio_block, mic);
1600 if (err)
1601 mpsslog("%s virtblk pthread_create failed %s\n",
ced2c60f 1602 mic->name, strerror(err));
8d497515
CY
1603 sigemptyset(&act.sa_mask);
1604 err = sigaction(SIGUSR1, &act, NULL);
1605 if (err)
1606 mpsslog("%s sigaction SIGUSR1 failed %s\n",
ced2c60f 1607 mic->name, strerror(errno));
8d497515
CY
1608 while (1)
1609 sleep(60);
1610 case -1:
1611 mpsslog("fork failed MIC name %s id %d errno %d\n",
1612 mic->name, mic->id, errno);
1613 break;
1614 default:
1615 if (mic->restart) {
1616 snprintf(buffer, PATH_MAX, "boot");
1617 setsysfs(mic->name, "state", buffer);
1618 mpsslog("%s restarting mic %d\n",
1619 mic->name, mic->restart);
1620 mic->restart = 0;
1621 }
1622 pthread_create(&mic->config_thread, NULL, mic_config, mic);
1623 }
1624}
1625
1626static void
1627start_daemon(void)
1628{
1629 struct mic_info *mic;
1630
1631 for (mic = mic_list.next; mic != NULL; mic = mic->next)
1632 init_mic(mic);
1633
1634 while (1)
1635 sleep(60);
1636}
1637
1638static int
1639init_mic_list(void)
1640{
1641 struct mic_info *mic = &mic_list;
1642 struct dirent *file;
1643 DIR *dp;
1644 int cnt = 0;
1645
1646 dp = opendir(MICSYSFSDIR);
1647 if (!dp)
1648 return 0;
1649
1650 while ((file = readdir(dp)) != NULL) {
1651 if (!strncmp(file->d_name, "mic", 3)) {
af190494 1652 mic->next = calloc(1, sizeof(struct mic_info));
8d497515
CY
1653 if (mic->next) {
1654 mic = mic->next;
8d497515
CY
1655 mic->id = atoi(&file->d_name[3]);
1656 mic->name = malloc(strlen(file->d_name) + 16);
1657 if (mic->name)
1658 strcpy(mic->name, file->d_name);
1659 mpsslog("MIC name %s id %d\n", mic->name,
1660 mic->id);
1661 cnt++;
1662 }
1663 }
1664 }
1665
1666 closedir(dp);
1667 return cnt;
1668}
1669
1670void
1671mpsslog(char *format, ...)
1672{
1673 va_list args;
1674 char buffer[4096];
1675 char ts[52], *ts1;
1676 time_t t;
1677
1678 if (logfp == NULL)
1679 return;
1680
1681 va_start(args, format);
1682 vsprintf(buffer, format, args);
1683 va_end(args);
1684
1685 time(&t);
1686 ts1 = ctime_r(&t, ts);
1687 ts1[strlen(ts1) - 1] = '\0';
1688 fprintf(logfp, "%s: %s", ts1, buffer);
1689
1690 fflush(logfp);
1691}
1692
1693int
1694main(int argc, char *argv[])
1695{
1696 int cnt;
1697 pid_t pid;
1698
1699 myname = argv[0];
1700
1701 logfp = fopen(LOGFILE_NAME, "a+");
1702 if (!logfp) {
1703 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1704 exit(1);
1705 }
1706 pid = fork();
1707 switch (pid) {
1708 case 0:
1709 break;
1710 case -1:
1711 exit(2);
1712 default:
1713 exit(0);
1714 }
1715
1716 mpsslog("MIC Daemon start\n");
1717
1718 cnt = init_mic_list();
1719 if (cnt == 0) {
1720 mpsslog("MIC module not loaded\n");
1721 exit(3);
1722 }
1723 mpsslog("MIC found %d devices\n", cnt);
1724
1725 start_daemon();
1726
1727 exit(0);
1728}