Merge tag 'kbuild-v4.21-3' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy...
[sfrench/cifs-2.6.git] / drivers / block / sunvdc.c
1 /* sunvdc.c: Sun LDOM Virtual Disk Client.
2  *
3  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4  */
5
6 #include <linux/module.h>
7 #include <linux/kernel.h>
8 #include <linux/types.h>
9 #include <linux/blk-mq.h>
10 #include <linux/hdreg.h>
11 #include <linux/genhd.h>
12 #include <linux/cdrom.h>
13 #include <linux/slab.h>
14 #include <linux/spinlock.h>
15 #include <linux/completion.h>
16 #include <linux/delay.h>
17 #include <linux/init.h>
18 #include <linux/list.h>
19 #include <linux/scatterlist.h>
20
21 #include <asm/vio.h>
22 #include <asm/ldc.h>
23
24 #define DRV_MODULE_NAME         "sunvdc"
25 #define PFX DRV_MODULE_NAME     ": "
26 #define DRV_MODULE_VERSION      "1.2"
27 #define DRV_MODULE_RELDATE      "November 24, 2014"
28
29 static char version[] =
30         DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
31 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
32 MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
33 MODULE_LICENSE("GPL");
34 MODULE_VERSION(DRV_MODULE_VERSION);
35
36 #define VDC_TX_RING_SIZE        512
37 #define VDC_DEFAULT_BLK_SIZE    512
38
39 #define MAX_XFER_BLKS           (128 * 1024)
40 #define MAX_XFER_SIZE           (MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE)
41 #define MAX_RING_COOKIES        ((MAX_XFER_BLKS / PAGE_SIZE) + 2)
42
43 #define WAITING_FOR_LINK_UP     0x01
44 #define WAITING_FOR_TX_SPACE    0x02
45 #define WAITING_FOR_GEN_CMD     0x04
46 #define WAITING_FOR_ANY         -1
47
48 #define VDC_MAX_RETRIES 10
49
50 static struct workqueue_struct *sunvdc_wq;
51
52 struct vdc_req_entry {
53         struct request          *req;
54 };
55
56 struct vdc_port {
57         struct vio_driver_state vio;
58
59         struct gendisk          *disk;
60
61         struct vdc_completion   *cmp;
62
63         u64                     req_id;
64         u64                     seq;
65         struct vdc_req_entry    rq_arr[VDC_TX_RING_SIZE];
66
67         unsigned long           ring_cookies;
68
69         u64                     max_xfer_size;
70         u32                     vdisk_block_size;
71         u32                     drain;
72
73         u64                     ldc_timeout;
74         struct delayed_work     ldc_reset_timer_work;
75         struct work_struct      ldc_reset_work;
76
77         /* The server fills these in for us in the disk attribute
78          * ACK packet.
79          */
80         u64                     operations;
81         u32                     vdisk_size;
82         u8                      vdisk_type;
83         u8                      vdisk_mtype;
84         u32                     vdisk_phys_blksz;
85
86         struct blk_mq_tag_set   tag_set;
87
88         char                    disk_name[32];
89 };
90
91 static void vdc_ldc_reset(struct vdc_port *port);
92 static void vdc_ldc_reset_work(struct work_struct *work);
93 static void vdc_ldc_reset_timer_work(struct work_struct *work);
94
95 static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
96 {
97         return container_of(vio, struct vdc_port, vio);
98 }
99
100 /* Ordered from largest major to lowest */
101 static struct vio_version vdc_versions[] = {
102         { .major = 1, .minor = 2 },
103         { .major = 1, .minor = 1 },
104         { .major = 1, .minor = 0 },
105 };
106
107 static inline int vdc_version_supported(struct vdc_port *port,
108                                         u16 major, u16 minor)
109 {
110         return port->vio.ver.major == major && port->vio.ver.minor >= minor;
111 }
112
113 #define VDCBLK_NAME     "vdisk"
114 static int vdc_major;
115 #define PARTITION_SHIFT 3
116
117 static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
118 {
119         return vio_dring_avail(dr, VDC_TX_RING_SIZE);
120 }
121
122 static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
123 {
124         struct gendisk *disk = bdev->bd_disk;
125         sector_t nsect = get_capacity(disk);
126         sector_t cylinders = nsect;
127
128         geo->heads = 0xff;
129         geo->sectors = 0x3f;
130         sector_div(cylinders, geo->heads * geo->sectors);
131         geo->cylinders = cylinders;
132         if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect)
133                 geo->cylinders = 0xffff;
134
135         return 0;
136 }
137
138 /* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev
139  * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD.
140  * Needed to be able to install inside an ldom from an iso image.
141  */
142 static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
143                      unsigned command, unsigned long argument)
144 {
145         int i;
146         struct gendisk *disk;
147
148         switch (command) {
149         case CDROMMULTISESSION:
150                 pr_debug(PFX "Multisession CDs not supported\n");
151                 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
152                         if (put_user(0, (char __user *)(argument + i)))
153                                 return -EFAULT;
154                 return 0;
155
156         case CDROM_GET_CAPABILITY:
157                 disk = bdev->bd_disk;
158
159                 if (bdev->bd_disk && (disk->flags & GENHD_FL_CD))
160                         return 0;
161                 return -EINVAL;
162
163         default:
164                 pr_debug(PFX "ioctl %08x not supported\n", command);
165                 return -EINVAL;
166         }
167 }
168
169 static const struct block_device_operations vdc_fops = {
170         .owner          = THIS_MODULE,
171         .getgeo         = vdc_getgeo,
172         .ioctl          = vdc_ioctl,
173 };
174
175 static void vdc_blk_queue_start(struct vdc_port *port)
176 {
177         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
178
179         /* restart blk queue when ring is half emptied. also called after
180          * handshake completes, so check for initial handshake before we've
181          * allocated a disk.
182          */
183         if (port->disk && vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
184                 blk_mq_start_stopped_hw_queues(port->disk->queue, true);
185 }
186
187 static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
188 {
189         if (vio->cmp &&
190             (waiting_for == -1 ||
191              vio->cmp->waiting_for == waiting_for)) {
192                 vio->cmp->err = err;
193                 complete(&vio->cmp->com);
194                 vio->cmp = NULL;
195         }
196 }
197
198 static void vdc_handshake_complete(struct vio_driver_state *vio)
199 {
200         struct vdc_port *port = to_vdc_port(vio);
201
202         cancel_delayed_work(&port->ldc_reset_timer_work);
203         vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
204         vdc_blk_queue_start(port);
205 }
206
207 static int vdc_handle_unknown(struct vdc_port *port, void *arg)
208 {
209         struct vio_msg_tag *pkt = arg;
210
211         printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
212                pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
213         printk(KERN_ERR PFX "Resetting connection.\n");
214
215         ldc_disconnect(port->vio.lp);
216
217         return -ECONNRESET;
218 }
219
220 static int vdc_send_attr(struct vio_driver_state *vio)
221 {
222         struct vdc_port *port = to_vdc_port(vio);
223         struct vio_disk_attr_info pkt;
224
225         memset(&pkt, 0, sizeof(pkt));
226
227         pkt.tag.type = VIO_TYPE_CTRL;
228         pkt.tag.stype = VIO_SUBTYPE_INFO;
229         pkt.tag.stype_env = VIO_ATTR_INFO;
230         pkt.tag.sid = vio_send_sid(vio);
231
232         pkt.xfer_mode = VIO_DRING_MODE;
233         pkt.vdisk_block_size = port->vdisk_block_size;
234         pkt.max_xfer_size = port->max_xfer_size;
235
236         viodbg(HS, "SEND ATTR xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
237                pkt.xfer_mode, pkt.vdisk_block_size, pkt.max_xfer_size);
238
239         return vio_ldc_send(&port->vio, &pkt, sizeof(pkt));
240 }
241
242 static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
243 {
244         struct vdc_port *port = to_vdc_port(vio);
245         struct vio_disk_attr_info *pkt = arg;
246
247         viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] "
248                "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
249                pkt->tag.stype, pkt->operations,
250                pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype,
251                pkt->xfer_mode, pkt->vdisk_block_size,
252                pkt->max_xfer_size);
253
254         if (pkt->tag.stype == VIO_SUBTYPE_ACK) {
255                 switch (pkt->vdisk_type) {
256                 case VD_DISK_TYPE_DISK:
257                 case VD_DISK_TYPE_SLICE:
258                         break;
259
260                 default:
261                         printk(KERN_ERR PFX "%s: Bogus vdisk_type 0x%x\n",
262                                vio->name, pkt->vdisk_type);
263                         return -ECONNRESET;
264                 }
265
266                 if (pkt->vdisk_block_size > port->vdisk_block_size) {
267                         printk(KERN_ERR PFX "%s: BLOCK size increased "
268                                "%u --> %u\n",
269                                vio->name,
270                                port->vdisk_block_size, pkt->vdisk_block_size);
271                         return -ECONNRESET;
272                 }
273
274                 port->operations = pkt->operations;
275                 port->vdisk_type = pkt->vdisk_type;
276                 if (vdc_version_supported(port, 1, 1)) {
277                         port->vdisk_size = pkt->vdisk_size;
278                         port->vdisk_mtype = pkt->vdisk_mtype;
279                 }
280                 if (pkt->max_xfer_size < port->max_xfer_size)
281                         port->max_xfer_size = pkt->max_xfer_size;
282                 port->vdisk_block_size = pkt->vdisk_block_size;
283
284                 port->vdisk_phys_blksz = VDC_DEFAULT_BLK_SIZE;
285                 if (vdc_version_supported(port, 1, 2))
286                         port->vdisk_phys_blksz = pkt->phys_block_size;
287
288                 return 0;
289         } else {
290                 printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name);
291
292                 return -ECONNRESET;
293         }
294 }
295
296 static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc)
297 {
298         int err = desc->status;
299
300         vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
301 }
302
303 static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
304                         unsigned int index)
305 {
306         struct vio_disk_desc *desc = vio_dring_entry(dr, index);
307         struct vdc_req_entry *rqe = &port->rq_arr[index];
308         struct request *req;
309
310         if (unlikely(desc->hdr.state != VIO_DESC_DONE))
311                 return;
312
313         ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
314         desc->hdr.state = VIO_DESC_FREE;
315         dr->cons = vio_dring_next(dr, index);
316
317         req = rqe->req;
318         if (req == NULL) {
319                 vdc_end_special(port, desc);
320                 return;
321         }
322
323         rqe->req = NULL;
324
325         blk_mq_end_request(req, desc->status ? BLK_STS_IOERR : 0);
326
327         vdc_blk_queue_start(port);
328 }
329
330 static int vdc_ack(struct vdc_port *port, void *msgbuf)
331 {
332         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
333         struct vio_dring_data *pkt = msgbuf;
334
335         if (unlikely(pkt->dring_ident != dr->ident ||
336                      pkt->start_idx != pkt->end_idx ||
337                      pkt->start_idx >= VDC_TX_RING_SIZE))
338                 return 0;
339
340         vdc_end_one(port, dr, pkt->start_idx);
341
342         return 0;
343 }
344
345 static int vdc_nack(struct vdc_port *port, void *msgbuf)
346 {
347         /* XXX Implement me XXX */
348         return 0;
349 }
350
351 static void vdc_event(void *arg, int event)
352 {
353         struct vdc_port *port = arg;
354         struct vio_driver_state *vio = &port->vio;
355         unsigned long flags;
356         int err;
357
358         spin_lock_irqsave(&vio->lock, flags);
359
360         if (unlikely(event == LDC_EVENT_RESET)) {
361                 vio_link_state_change(vio, event);
362                 queue_work(sunvdc_wq, &port->ldc_reset_work);
363                 goto out;
364         }
365
366         if (unlikely(event == LDC_EVENT_UP)) {
367                 vio_link_state_change(vio, event);
368                 goto out;
369         }
370
371         if (unlikely(event != LDC_EVENT_DATA_READY)) {
372                 pr_warn(PFX "Unexpected LDC event %d\n", event);
373                 goto out;
374         }
375
376         err = 0;
377         while (1) {
378                 union {
379                         struct vio_msg_tag tag;
380                         u64 raw[8];
381                 } msgbuf;
382
383                 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
384                 if (unlikely(err < 0)) {
385                         if (err == -ECONNRESET)
386                                 vio_conn_reset(vio);
387                         break;
388                 }
389                 if (err == 0)
390                         break;
391                 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
392                        msgbuf.tag.type,
393                        msgbuf.tag.stype,
394                        msgbuf.tag.stype_env,
395                        msgbuf.tag.sid);
396                 err = vio_validate_sid(vio, &msgbuf.tag);
397                 if (err < 0)
398                         break;
399
400                 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
401                         if (msgbuf.tag.stype == VIO_SUBTYPE_ACK)
402                                 err = vdc_ack(port, &msgbuf);
403                         else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK)
404                                 err = vdc_nack(port, &msgbuf);
405                         else
406                                 err = vdc_handle_unknown(port, &msgbuf);
407                 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
408                         err = vio_control_pkt_engine(vio, &msgbuf);
409                 } else {
410                         err = vdc_handle_unknown(port, &msgbuf);
411                 }
412                 if (err < 0)
413                         break;
414         }
415         if (err < 0)
416                 vdc_finish(&port->vio, err, WAITING_FOR_ANY);
417 out:
418         spin_unlock_irqrestore(&vio->lock, flags);
419 }
420
421 static int __vdc_tx_trigger(struct vdc_port *port)
422 {
423         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
424         struct vio_dring_data hdr = {
425                 .tag = {
426                         .type           = VIO_TYPE_DATA,
427                         .stype          = VIO_SUBTYPE_INFO,
428                         .stype_env      = VIO_DRING_DATA,
429                         .sid            = vio_send_sid(&port->vio),
430                 },
431                 .dring_ident            = dr->ident,
432                 .start_idx              = dr->prod,
433                 .end_idx                = dr->prod,
434         };
435         int err, delay;
436         int retries = 0;
437
438         hdr.seq = dr->snd_nxt;
439         delay = 1;
440         do {
441                 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
442                 if (err > 0) {
443                         dr->snd_nxt++;
444                         break;
445                 }
446                 udelay(delay);
447                 if ((delay <<= 1) > 128)
448                         delay = 128;
449                 if (retries++ > VDC_MAX_RETRIES)
450                         break;
451         } while (err == -EAGAIN);
452
453         if (err == -ENOTCONN)
454                 vdc_ldc_reset(port);
455         return err;
456 }
457
458 static int __send_request(struct request *req)
459 {
460         struct vdc_port *port = req->rq_disk->private_data;
461         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
462         struct scatterlist sg[MAX_RING_COOKIES];
463         struct vdc_req_entry *rqe;
464         struct vio_disk_desc *desc;
465         unsigned int map_perm;
466         int nsg, err, i;
467         u64 len;
468         u8 op;
469
470         if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES))
471                 return -EINVAL;
472
473         map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
474
475         if (rq_data_dir(req) == READ) {
476                 map_perm |= LDC_MAP_W;
477                 op = VD_OP_BREAD;
478         } else {
479                 map_perm |= LDC_MAP_R;
480                 op = VD_OP_BWRITE;
481         }
482
483         sg_init_table(sg, port->ring_cookies);
484         nsg = blk_rq_map_sg(req->q, req, sg);
485
486         len = 0;
487         for (i = 0; i < nsg; i++)
488                 len += sg[i].length;
489
490         desc = vio_dring_cur(dr);
491
492         err = ldc_map_sg(port->vio.lp, sg, nsg,
493                          desc->cookies, port->ring_cookies,
494                          map_perm);
495         if (err < 0) {
496                 printk(KERN_ERR PFX "ldc_map_sg() failure, err=%d.\n", err);
497                 return err;
498         }
499
500         rqe = &port->rq_arr[dr->prod];
501         rqe->req = req;
502
503         desc->hdr.ack = VIO_ACK_ENABLE;
504         desc->req_id = port->req_id;
505         desc->operation = op;
506         if (port->vdisk_type == VD_DISK_TYPE_DISK) {
507                 desc->slice = 0xff;
508         } else {
509                 desc->slice = 0;
510         }
511         desc->status = ~0;
512         desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size;
513         desc->size = len;
514         desc->ncookies = err;
515
516         /* This has to be a non-SMP write barrier because we are writing
517          * to memory which is shared with the peer LDOM.
518          */
519         wmb();
520         desc->hdr.state = VIO_DESC_READY;
521
522         err = __vdc_tx_trigger(port);
523         if (err < 0) {
524                 printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err);
525         } else {
526                 port->req_id++;
527                 dr->prod = vio_dring_next(dr, dr->prod);
528         }
529
530         return err;
531 }
532
533 static blk_status_t vdc_queue_rq(struct blk_mq_hw_ctx *hctx,
534                                  const struct blk_mq_queue_data *bd)
535 {
536         struct vdc_port *port = hctx->queue->queuedata;
537         struct vio_dring_state *dr;
538         unsigned long flags;
539
540         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
541
542         blk_mq_start_request(bd->rq);
543
544         spin_lock_irqsave(&port->vio.lock, flags);
545
546         /*
547          * Doing drain, just end the request in error
548          */
549         if (unlikely(port->drain)) {
550                 spin_unlock_irqrestore(&port->vio.lock, flags);
551                 return BLK_STS_IOERR;
552         }
553
554         if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
555                 spin_unlock_irqrestore(&port->vio.lock, flags);
556                 blk_mq_stop_hw_queue(hctx);
557                 return BLK_STS_DEV_RESOURCE;
558         }
559
560         if (__send_request(bd->rq) < 0) {
561                 spin_unlock_irqrestore(&port->vio.lock, flags);
562                 return BLK_STS_IOERR;
563         }
564
565         spin_unlock_irqrestore(&port->vio.lock, flags);
566         return BLK_STS_OK;
567 }
568
569 static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
570 {
571         struct vio_dring_state *dr;
572         struct vio_completion comp;
573         struct vio_disk_desc *desc;
574         unsigned int map_perm;
575         unsigned long flags;
576         int op_len, err;
577         void *req_buf;
578
579         if (!(((u64)1 << (u64)op) & port->operations))
580                 return -EOPNOTSUPP;
581
582         switch (op) {
583         case VD_OP_BREAD:
584         case VD_OP_BWRITE:
585         default:
586                 return -EINVAL;
587
588         case VD_OP_FLUSH:
589                 op_len = 0;
590                 map_perm = 0;
591                 break;
592
593         case VD_OP_GET_WCE:
594                 op_len = sizeof(u32);
595                 map_perm = LDC_MAP_W;
596                 break;
597
598         case VD_OP_SET_WCE:
599                 op_len = sizeof(u32);
600                 map_perm = LDC_MAP_R;
601                 break;
602
603         case VD_OP_GET_VTOC:
604                 op_len = sizeof(struct vio_disk_vtoc);
605                 map_perm = LDC_MAP_W;
606                 break;
607
608         case VD_OP_SET_VTOC:
609                 op_len = sizeof(struct vio_disk_vtoc);
610                 map_perm = LDC_MAP_R;
611                 break;
612
613         case VD_OP_GET_DISKGEOM:
614                 op_len = sizeof(struct vio_disk_geom);
615                 map_perm = LDC_MAP_W;
616                 break;
617
618         case VD_OP_SET_DISKGEOM:
619                 op_len = sizeof(struct vio_disk_geom);
620                 map_perm = LDC_MAP_R;
621                 break;
622
623         case VD_OP_SCSICMD:
624                 op_len = 16;
625                 map_perm = LDC_MAP_RW;
626                 break;
627
628         case VD_OP_GET_DEVID:
629                 op_len = sizeof(struct vio_disk_devid);
630                 map_perm = LDC_MAP_W;
631                 break;
632
633         case VD_OP_GET_EFI:
634         case VD_OP_SET_EFI:
635                 return -EOPNOTSUPP;
636         };
637
638         map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
639
640         op_len = (op_len + 7) & ~7;
641         req_buf = kzalloc(op_len, GFP_KERNEL);
642         if (!req_buf)
643                 return -ENOMEM;
644
645         if (len > op_len)
646                 len = op_len;
647
648         if (map_perm & LDC_MAP_R)
649                 memcpy(req_buf, buf, len);
650
651         spin_lock_irqsave(&port->vio.lock, flags);
652
653         dr = &port->vio.drings[VIO_DRIVER_TX_RING];
654
655         /* XXX If we want to use this code generically we have to
656          * XXX handle TX ring exhaustion etc.
657          */
658         desc = vio_dring_cur(dr);
659
660         err = ldc_map_single(port->vio.lp, req_buf, op_len,
661                              desc->cookies, port->ring_cookies,
662                              map_perm);
663         if (err < 0) {
664                 spin_unlock_irqrestore(&port->vio.lock, flags);
665                 kfree(req_buf);
666                 return err;
667         }
668
669         init_completion(&comp.com);
670         comp.waiting_for = WAITING_FOR_GEN_CMD;
671         port->vio.cmp = &comp;
672
673         desc->hdr.ack = VIO_ACK_ENABLE;
674         desc->req_id = port->req_id;
675         desc->operation = op;
676         desc->slice = 0;
677         desc->status = ~0;
678         desc->offset = 0;
679         desc->size = op_len;
680         desc->ncookies = err;
681
682         /* This has to be a non-SMP write barrier because we are writing
683          * to memory which is shared with the peer LDOM.
684          */
685         wmb();
686         desc->hdr.state = VIO_DESC_READY;
687
688         err = __vdc_tx_trigger(port);
689         if (err >= 0) {
690                 port->req_id++;
691                 dr->prod = vio_dring_next(dr, dr->prod);
692                 spin_unlock_irqrestore(&port->vio.lock, flags);
693
694                 wait_for_completion(&comp.com);
695                 err = comp.err;
696         } else {
697                 port->vio.cmp = NULL;
698                 spin_unlock_irqrestore(&port->vio.lock, flags);
699         }
700
701         if (map_perm & LDC_MAP_W)
702                 memcpy(buf, req_buf, len);
703
704         kfree(req_buf);
705
706         return err;
707 }
708
709 static int vdc_alloc_tx_ring(struct vdc_port *port)
710 {
711         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
712         unsigned long len, entry_size;
713         int ncookies;
714         void *dring;
715
716         entry_size = sizeof(struct vio_disk_desc) +
717                 (sizeof(struct ldc_trans_cookie) * port->ring_cookies);
718         len = (VDC_TX_RING_SIZE * entry_size);
719
720         ncookies = VIO_MAX_RING_COOKIES;
721         dring = ldc_alloc_exp_dring(port->vio.lp, len,
722                                     dr->cookies, &ncookies,
723                                     (LDC_MAP_SHADOW |
724                                      LDC_MAP_DIRECT |
725                                      LDC_MAP_RW));
726         if (IS_ERR(dring))
727                 return PTR_ERR(dring);
728
729         dr->base = dring;
730         dr->entry_size = entry_size;
731         dr->num_entries = VDC_TX_RING_SIZE;
732         dr->prod = dr->cons = 0;
733         dr->pending = VDC_TX_RING_SIZE;
734         dr->ncookies = ncookies;
735
736         return 0;
737 }
738
739 static void vdc_free_tx_ring(struct vdc_port *port)
740 {
741         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
742
743         if (dr->base) {
744                 ldc_free_exp_dring(port->vio.lp, dr->base,
745                                    (dr->entry_size * dr->num_entries),
746                                    dr->cookies, dr->ncookies);
747                 dr->base = NULL;
748                 dr->entry_size = 0;
749                 dr->num_entries = 0;
750                 dr->pending = 0;
751                 dr->ncookies = 0;
752         }
753 }
754
755 static int vdc_port_up(struct vdc_port *port)
756 {
757         struct vio_completion comp;
758
759         init_completion(&comp.com);
760         comp.err = 0;
761         comp.waiting_for = WAITING_FOR_LINK_UP;
762         port->vio.cmp = &comp;
763
764         vio_port_up(&port->vio);
765         wait_for_completion(&comp.com);
766         return comp.err;
767 }
768
769 static void vdc_port_down(struct vdc_port *port)
770 {
771         ldc_disconnect(port->vio.lp);
772         ldc_unbind(port->vio.lp);
773         vdc_free_tx_ring(port);
774         vio_ldc_free(&port->vio);
775 }
776
777 static const struct blk_mq_ops vdc_mq_ops = {
778         .queue_rq       = vdc_queue_rq,
779 };
780
781 static void cleanup_queue(struct request_queue *q)
782 {
783         struct vdc_port *port = q->queuedata;
784
785         blk_cleanup_queue(q);
786         blk_mq_free_tag_set(&port->tag_set);
787 }
788
789 static struct request_queue *init_queue(struct vdc_port *port)
790 {
791         struct request_queue *q;
792
793         q = blk_mq_init_sq_queue(&port->tag_set, &vdc_mq_ops, VDC_TX_RING_SIZE,
794                                         BLK_MQ_F_SHOULD_MERGE);
795         if (IS_ERR(q))
796                 return q;
797
798         q->queuedata = port;
799         return q;
800 }
801
802 static int probe_disk(struct vdc_port *port)
803 {
804         struct request_queue *q;
805         struct gendisk *g;
806         int err;
807
808         err = vdc_port_up(port);
809         if (err)
810                 return err;
811
812         /* Using version 1.2 means vdisk_phys_blksz should be set unless the
813          * disk is reserved by another system.
814          */
815         if (vdc_version_supported(port, 1, 2) && !port->vdisk_phys_blksz)
816                 return -ENODEV;
817
818         if (vdc_version_supported(port, 1, 1)) {
819                 /* vdisk_size should be set during the handshake, if it wasn't
820                  * then the underlying disk is reserved by another system
821                  */
822                 if (port->vdisk_size == -1)
823                         return -ENODEV;
824         } else {
825                 struct vio_disk_geom geom;
826
827                 err = generic_request(port, VD_OP_GET_DISKGEOM,
828                                       &geom, sizeof(geom));
829                 if (err < 0) {
830                         printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
831                                "error %d\n", err);
832                         return err;
833                 }
834                 port->vdisk_size = ((u64)geom.num_cyl *
835                                     (u64)geom.num_hd *
836                                     (u64)geom.num_sec);
837         }
838
839         q = init_queue(port);
840         if (IS_ERR(q)) {
841                 printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
842                        port->vio.name);
843                 return PTR_ERR(q);
844         }
845         g = alloc_disk(1 << PARTITION_SHIFT);
846         if (!g) {
847                 printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
848                        port->vio.name);
849                 cleanup_queue(q);
850                 return -ENOMEM;
851         }
852
853         port->disk = g;
854
855         /* Each segment in a request is up to an aligned page in size. */
856         blk_queue_segment_boundary(q, PAGE_SIZE - 1);
857         blk_queue_max_segment_size(q, PAGE_SIZE);
858
859         blk_queue_max_segments(q, port->ring_cookies);
860         blk_queue_max_hw_sectors(q, port->max_xfer_size);
861         g->major = vdc_major;
862         g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
863         strcpy(g->disk_name, port->disk_name);
864
865         g->fops = &vdc_fops;
866         g->queue = q;
867         g->private_data = port;
868
869         set_capacity(g, port->vdisk_size);
870
871         if (vdc_version_supported(port, 1, 1)) {
872                 switch (port->vdisk_mtype) {
873                 case VD_MEDIA_TYPE_CD:
874                         pr_info(PFX "Virtual CDROM %s\n", port->disk_name);
875                         g->flags |= GENHD_FL_CD;
876                         g->flags |= GENHD_FL_REMOVABLE;
877                         set_disk_ro(g, 1);
878                         break;
879
880                 case VD_MEDIA_TYPE_DVD:
881                         pr_info(PFX "Virtual DVD %s\n", port->disk_name);
882                         g->flags |= GENHD_FL_CD;
883                         g->flags |= GENHD_FL_REMOVABLE;
884                         set_disk_ro(g, 1);
885                         break;
886
887                 case VD_MEDIA_TYPE_FIXED:
888                         pr_info(PFX "Virtual Hard disk %s\n", port->disk_name);
889                         break;
890                 }
891         }
892
893         blk_queue_physical_block_size(q, port->vdisk_phys_blksz);
894
895         pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
896                g->disk_name,
897                port->vdisk_size, (port->vdisk_size >> (20 - 9)),
898                port->vio.ver.major, port->vio.ver.minor);
899
900         device_add_disk(&port->vio.vdev->dev, g, NULL);
901
902         return 0;
903 }
904
905 static struct ldc_channel_config vdc_ldc_cfg = {
906         .event          = vdc_event,
907         .mtu            = 64,
908         .mode           = LDC_MODE_UNRELIABLE,
909 };
910
911 static struct vio_driver_ops vdc_vio_ops = {
912         .send_attr              = vdc_send_attr,
913         .handle_attr            = vdc_handle_attr,
914         .handshake_complete     = vdc_handshake_complete,
915 };
916
917 static void print_version(void)
918 {
919         static int version_printed;
920
921         if (version_printed++ == 0)
922                 printk(KERN_INFO "%s", version);
923 }
924
925 struct vdc_check_port_data {
926         int     dev_no;
927         char    *type;
928 };
929
930 static int vdc_device_probed(struct device *dev, void *arg)
931 {
932         struct vio_dev *vdev = to_vio_dev(dev);
933         struct vdc_check_port_data *port_data;
934
935         port_data = (struct vdc_check_port_data *)arg;
936
937         if ((vdev->dev_no == port_data->dev_no) &&
938             (!(strcmp((char *)&vdev->type, port_data->type))) &&
939                 dev_get_drvdata(dev)) {
940                 /* This device has already been configured
941                  * by vdc_port_probe()
942                  */
943                 return 1;
944         } else {
945                 return 0;
946         }
947 }
948
949 /* Determine whether the VIO device is part of an mpgroup
950  * by locating all the virtual-device-port nodes associated
951  * with the parent virtual-device node for the VIO device
952  * and checking whether any of these nodes are vdc-ports
953  * which have already been configured.
954  *
955  * Returns true if this device is part of an mpgroup and has
956  * already been probed.
957  */
958 static bool vdc_port_mpgroup_check(struct vio_dev *vdev)
959 {
960         struct vdc_check_port_data port_data;
961         struct device *dev;
962
963         port_data.dev_no = vdev->dev_no;
964         port_data.type = (char *)&vdev->type;
965
966         dev = device_find_child(vdev->dev.parent, &port_data,
967                                 vdc_device_probed);
968
969         if (dev)
970                 return true;
971
972         return false;
973 }
974
975 static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
976 {
977         struct mdesc_handle *hp;
978         struct vdc_port *port;
979         int err;
980         const u64 *ldc_timeout;
981
982         print_version();
983
984         hp = mdesc_grab();
985
986         err = -ENODEV;
987         if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
988                 printk(KERN_ERR PFX "Port id [%llu] too large.\n",
989                        vdev->dev_no);
990                 goto err_out_release_mdesc;
991         }
992
993         /* Check if this device is part of an mpgroup */
994         if (vdc_port_mpgroup_check(vdev)) {
995                 printk(KERN_WARNING
996                         "VIO: Ignoring extra vdisk port %s",
997                         dev_name(&vdev->dev));
998                 goto err_out_release_mdesc;
999         }
1000
1001         port = kzalloc(sizeof(*port), GFP_KERNEL);
1002         err = -ENOMEM;
1003         if (!port) {
1004                 printk(KERN_ERR PFX "Cannot allocate vdc_port.\n");
1005                 goto err_out_release_mdesc;
1006         }
1007
1008         if (vdev->dev_no >= 26)
1009                 snprintf(port->disk_name, sizeof(port->disk_name),
1010                          VDCBLK_NAME "%c%c",
1011                          'a' + ((int)vdev->dev_no / 26) - 1,
1012                          'a' + ((int)vdev->dev_no % 26));
1013         else
1014                 snprintf(port->disk_name, sizeof(port->disk_name),
1015                          VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26));
1016         port->vdisk_size = -1;
1017
1018         /* Actual wall time may be double due to do_generic_file_read() doing
1019          * a readahead I/O first, and once that fails it will try to read a
1020          * single page.
1021          */
1022         ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL);
1023         port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0;
1024         INIT_DELAYED_WORK(&port->ldc_reset_timer_work, vdc_ldc_reset_timer_work);
1025         INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work);
1026
1027         err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
1028                               vdc_versions, ARRAY_SIZE(vdc_versions),
1029                               &vdc_vio_ops, port->disk_name);
1030         if (err)
1031                 goto err_out_free_port;
1032
1033         port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE;
1034         port->max_xfer_size = MAX_XFER_SIZE;
1035         port->ring_cookies = MAX_RING_COOKIES;
1036
1037         err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
1038         if (err)
1039                 goto err_out_free_port;
1040
1041         err = vdc_alloc_tx_ring(port);
1042         if (err)
1043                 goto err_out_free_ldc;
1044
1045         err = probe_disk(port);
1046         if (err)
1047                 goto err_out_free_tx_ring;
1048
1049         /* Note that the device driver_data is used to determine
1050          * whether the port has been probed.
1051          */
1052         dev_set_drvdata(&vdev->dev, port);
1053
1054         mdesc_release(hp);
1055
1056         return 0;
1057
1058 err_out_free_tx_ring:
1059         vdc_free_tx_ring(port);
1060
1061 err_out_free_ldc:
1062         vio_ldc_free(&port->vio);
1063
1064 err_out_free_port:
1065         kfree(port);
1066
1067 err_out_release_mdesc:
1068         mdesc_release(hp);
1069         return err;
1070 }
1071
1072 static int vdc_port_remove(struct vio_dev *vdev)
1073 {
1074         struct vdc_port *port = dev_get_drvdata(&vdev->dev);
1075
1076         if (port) {
1077                 blk_mq_stop_hw_queues(port->disk->queue);
1078
1079                 flush_work(&port->ldc_reset_work);
1080                 cancel_delayed_work_sync(&port->ldc_reset_timer_work);
1081                 del_timer_sync(&port->vio.timer);
1082
1083                 del_gendisk(port->disk);
1084                 cleanup_queue(port->disk->queue);
1085                 put_disk(port->disk);
1086                 port->disk = NULL;
1087
1088                 vdc_free_tx_ring(port);
1089                 vio_ldc_free(&port->vio);
1090
1091                 dev_set_drvdata(&vdev->dev, NULL);
1092
1093                 kfree(port);
1094         }
1095         return 0;
1096 }
1097
1098 static void vdc_requeue_inflight(struct vdc_port *port)
1099 {
1100         struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1101         u32 idx;
1102
1103         for (idx = dr->cons; idx != dr->prod; idx = vio_dring_next(dr, idx)) {
1104                 struct vio_disk_desc *desc = vio_dring_entry(dr, idx);
1105                 struct vdc_req_entry *rqe = &port->rq_arr[idx];
1106                 struct request *req;
1107
1108                 ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
1109                 desc->hdr.state = VIO_DESC_FREE;
1110                 dr->cons = vio_dring_next(dr, idx);
1111
1112                 req = rqe->req;
1113                 if (req == NULL) {
1114                         vdc_end_special(port, desc);
1115                         continue;
1116                 }
1117
1118                 rqe->req = NULL;
1119                 blk_mq_requeue_request(req, false);
1120         }
1121 }
1122
1123 static void vdc_queue_drain(struct vdc_port *port)
1124 {
1125         struct request_queue *q = port->disk->queue;
1126
1127         /*
1128          * Mark the queue as draining, then freeze/quiesce to ensure
1129          * that all existing requests are seen in ->queue_rq() and killed
1130          */
1131         port->drain = 1;
1132         spin_unlock_irq(&port->vio.lock);
1133
1134         blk_mq_freeze_queue(q);
1135         blk_mq_quiesce_queue(q);
1136
1137         spin_lock_irq(&port->vio.lock);
1138         port->drain = 0;
1139         blk_mq_unquiesce_queue(q);
1140         blk_mq_unfreeze_queue(q);
1141 }
1142
1143 static void vdc_ldc_reset_timer_work(struct work_struct *work)
1144 {
1145         struct vdc_port *port;
1146         struct vio_driver_state *vio;
1147
1148         port = container_of(work, struct vdc_port, ldc_reset_timer_work.work);
1149         vio = &port->vio;
1150
1151         spin_lock_irq(&vio->lock);
1152         if (!(port->vio.hs_state & VIO_HS_COMPLETE)) {
1153                 pr_warn(PFX "%s ldc down %llu seconds, draining queue\n",
1154                         port->disk_name, port->ldc_timeout);
1155                 vdc_queue_drain(port);
1156                 vdc_blk_queue_start(port);
1157         }
1158         spin_unlock_irq(&vio->lock);
1159 }
1160
1161 static void vdc_ldc_reset_work(struct work_struct *work)
1162 {
1163         struct vdc_port *port;
1164         struct vio_driver_state *vio;
1165         unsigned long flags;
1166
1167         port = container_of(work, struct vdc_port, ldc_reset_work);
1168         vio = &port->vio;
1169
1170         spin_lock_irqsave(&vio->lock, flags);
1171         vdc_ldc_reset(port);
1172         spin_unlock_irqrestore(&vio->lock, flags);
1173 }
1174
1175 static void vdc_ldc_reset(struct vdc_port *port)
1176 {
1177         int err;
1178
1179         assert_spin_locked(&port->vio.lock);
1180
1181         pr_warn(PFX "%s ldc link reset\n", port->disk_name);
1182         blk_mq_stop_hw_queues(port->disk->queue);
1183         vdc_requeue_inflight(port);
1184         vdc_port_down(port);
1185
1186         err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
1187         if (err) {
1188                 pr_err(PFX "%s vio_ldc_alloc:%d\n", port->disk_name, err);
1189                 return;
1190         }
1191
1192         err = vdc_alloc_tx_ring(port);
1193         if (err) {
1194                 pr_err(PFX "%s vio_alloc_tx_ring:%d\n", port->disk_name, err);
1195                 goto err_free_ldc;
1196         }
1197
1198         if (port->ldc_timeout)
1199                 mod_delayed_work(system_wq, &port->ldc_reset_timer_work,
1200                           round_jiffies(jiffies + HZ * port->ldc_timeout));
1201         mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
1202         return;
1203
1204 err_free_ldc:
1205         vio_ldc_free(&port->vio);
1206 }
1207
1208 static const struct vio_device_id vdc_port_match[] = {
1209         {
1210                 .type = "vdc-port",
1211         },
1212         {},
1213 };
1214 MODULE_DEVICE_TABLE(vio, vdc_port_match);
1215
1216 static struct vio_driver vdc_port_driver = {
1217         .id_table       = vdc_port_match,
1218         .probe          = vdc_port_probe,
1219         .remove         = vdc_port_remove,
1220         .name           = "vdc_port",
1221 };
1222
1223 static int __init vdc_init(void)
1224 {
1225         int err;
1226
1227         sunvdc_wq = alloc_workqueue("sunvdc", 0, 0);
1228         if (!sunvdc_wq)
1229                 return -ENOMEM;
1230
1231         err = register_blkdev(0, VDCBLK_NAME);
1232         if (err < 0)
1233                 goto out_free_wq;
1234
1235         vdc_major = err;
1236
1237         err = vio_register_driver(&vdc_port_driver);
1238         if (err)
1239                 goto out_unregister_blkdev;
1240
1241         return 0;
1242
1243 out_unregister_blkdev:
1244         unregister_blkdev(vdc_major, VDCBLK_NAME);
1245         vdc_major = 0;
1246
1247 out_free_wq:
1248         destroy_workqueue(sunvdc_wq);
1249         return err;
1250 }
1251
1252 static void __exit vdc_exit(void)
1253 {
1254         vio_unregister_driver(&vdc_port_driver);
1255         unregister_blkdev(vdc_major, VDCBLK_NAME);
1256         destroy_workqueue(sunvdc_wq);
1257 }
1258
1259 module_init(vdc_init);
1260 module_exit(vdc_exit);