License cleanup: add SPDX GPL-2.0 license identifier to files with no license
[sfrench/cifs-2.6.git] / arch / sparc / kernel / ldc.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* ldc.c: Logical Domain Channel link-layer protocol driver.
3  *
4  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
5  */
6
7 #include <linux/kernel.h>
8 #include <linux/export.h>
9 #include <linux/slab.h>
10 #include <linux/spinlock.h>
11 #include <linux/delay.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/scatterlist.h>
15 #include <linux/interrupt.h>
16 #include <linux/list.h>
17 #include <linux/init.h>
18 #include <linux/bitmap.h>
19 #include <linux/iommu-common.h>
20
21 #include <asm/hypervisor.h>
22 #include <asm/iommu.h>
23 #include <asm/page.h>
24 #include <asm/ldc.h>
25 #include <asm/mdesc.h>
26
27 #define DRV_MODULE_NAME         "ldc"
28 #define PFX DRV_MODULE_NAME     ": "
29 #define DRV_MODULE_VERSION      "1.1"
30 #define DRV_MODULE_RELDATE      "July 22, 2008"
31
32 #define COOKIE_PGSZ_CODE        0xf000000000000000ULL
33 #define COOKIE_PGSZ_CODE_SHIFT  60ULL
34
35
36 static char version[] =
37         DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
38
39 /* Packet header layout for unreliable and reliable mode frames.
40  * When in RAW mode, packets are simply straight 64-byte payloads
41  * with no headers.
42  */
43 struct ldc_packet {
44         u8                      type;
45 #define LDC_CTRL                0x01
46 #define LDC_DATA                0x02
47 #define LDC_ERR                 0x10
48
49         u8                      stype;
50 #define LDC_INFO                0x01
51 #define LDC_ACK                 0x02
52 #define LDC_NACK                0x04
53
54         u8                      ctrl;
55 #define LDC_VERS                0x01 /* Link Version            */
56 #define LDC_RTS                 0x02 /* Request To Send         */
57 #define LDC_RTR                 0x03 /* Ready To Receive        */
58 #define LDC_RDX                 0x04 /* Ready for Data eXchange */
59 #define LDC_CTRL_MSK            0x0f
60
61         u8                      env;
62 #define LDC_LEN                 0x3f
63 #define LDC_FRAG_MASK           0xc0
64 #define LDC_START               0x40
65 #define LDC_STOP                0x80
66
67         u32                     seqid;
68
69         union {
70                 u8              u_data[LDC_PACKET_SIZE - 8];
71                 struct {
72                         u32     pad;
73                         u32     ackid;
74                         u8      r_data[LDC_PACKET_SIZE - 8 - 8];
75                 } r;
76         } u;
77 };
78
79 struct ldc_version {
80         u16 major;
81         u16 minor;
82 };
83
84 /* Ordered from largest major to lowest.  */
85 static struct ldc_version ver_arr[] = {
86         { .major = 1, .minor = 0 },
87 };
88
89 #define LDC_DEFAULT_MTU                 (4 * LDC_PACKET_SIZE)
90 #define LDC_DEFAULT_NUM_ENTRIES         (PAGE_SIZE / LDC_PACKET_SIZE)
91
92 struct ldc_channel;
93
94 struct ldc_mode_ops {
95         int (*write)(struct ldc_channel *, const void *, unsigned int);
96         int (*read)(struct ldc_channel *, void *, unsigned int);
97 };
98
99 static const struct ldc_mode_ops raw_ops;
100 static const struct ldc_mode_ops nonraw_ops;
101 static const struct ldc_mode_ops stream_ops;
102
103 int ldom_domaining_enabled;
104
105 struct ldc_iommu {
106         /* Protects ldc_unmap.  */
107         spinlock_t                      lock;
108         struct ldc_mtable_entry         *page_table;
109         struct iommu_map_table          iommu_map_table;
110 };
111
112 struct ldc_channel {
113         /* Protects all operations that depend upon channel state.  */
114         spinlock_t                      lock;
115
116         unsigned long                   id;
117
118         u8                              *mssbuf;
119         u32                             mssbuf_len;
120         u32                             mssbuf_off;
121
122         struct ldc_packet               *tx_base;
123         unsigned long                   tx_head;
124         unsigned long                   tx_tail;
125         unsigned long                   tx_num_entries;
126         unsigned long                   tx_ra;
127
128         unsigned long                   tx_acked;
129
130         struct ldc_packet               *rx_base;
131         unsigned long                   rx_head;
132         unsigned long                   rx_tail;
133         unsigned long                   rx_num_entries;
134         unsigned long                   rx_ra;
135
136         u32                             rcv_nxt;
137         u32                             snd_nxt;
138
139         unsigned long                   chan_state;
140
141         struct ldc_channel_config       cfg;
142         void                            *event_arg;
143
144         const struct ldc_mode_ops       *mops;
145
146         struct ldc_iommu                iommu;
147
148         struct ldc_version              ver;
149
150         u8                              hs_state;
151 #define LDC_HS_CLOSED                   0x00
152 #define LDC_HS_OPEN                     0x01
153 #define LDC_HS_GOTVERS                  0x02
154 #define LDC_HS_SENTRTR                  0x03
155 #define LDC_HS_GOTRTR                   0x04
156 #define LDC_HS_COMPLETE                 0x10
157
158         u8                              flags;
159 #define LDC_FLAG_ALLOCED_QUEUES         0x01
160 #define LDC_FLAG_REGISTERED_QUEUES      0x02
161 #define LDC_FLAG_REGISTERED_IRQS        0x04
162 #define LDC_FLAG_RESET                  0x10
163
164         u8                              mss;
165         u8                              state;
166
167 #define LDC_IRQ_NAME_MAX                32
168         char                            rx_irq_name[LDC_IRQ_NAME_MAX];
169         char                            tx_irq_name[LDC_IRQ_NAME_MAX];
170
171         struct hlist_head               mh_list;
172
173         struct hlist_node               list;
174 };
175
176 #define ldcdbg(TYPE, f, a...) \
177 do {    if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
178                 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
179 } while (0)
180
181 #define LDC_ABORT(lp)   ldc_abort((lp), __func__)
182
183 static const char *state_to_str(u8 state)
184 {
185         switch (state) {
186         case LDC_STATE_INVALID:
187                 return "INVALID";
188         case LDC_STATE_INIT:
189                 return "INIT";
190         case LDC_STATE_BOUND:
191                 return "BOUND";
192         case LDC_STATE_READY:
193                 return "READY";
194         case LDC_STATE_CONNECTED:
195                 return "CONNECTED";
196         default:
197                 return "<UNKNOWN>";
198         }
199 }
200
201 static unsigned long __advance(unsigned long off, unsigned long num_entries)
202 {
203         off += LDC_PACKET_SIZE;
204         if (off == (num_entries * LDC_PACKET_SIZE))
205                 off = 0;
206
207         return off;
208 }
209
210 static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
211 {
212         return __advance(off, lp->rx_num_entries);
213 }
214
215 static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
216 {
217         return __advance(off, lp->tx_num_entries);
218 }
219
220 static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
221                                                   unsigned long *new_tail)
222 {
223         struct ldc_packet *p;
224         unsigned long t;
225
226         t = tx_advance(lp, lp->tx_tail);
227         if (t == lp->tx_head)
228                 return NULL;
229
230         *new_tail = t;
231
232         p = lp->tx_base;
233         return p + (lp->tx_tail / LDC_PACKET_SIZE);
234 }
235
236 /* When we are in reliable or stream mode, have to track the next packet
237  * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
238  * to be careful not to stomp over the queue past that point.  During
239  * the handshake, we don't have TX data packets pending in the queue
240  * and that's why handshake_get_tx_packet() need not be mindful of
241  * lp->tx_acked.
242  */
243 static unsigned long head_for_data(struct ldc_channel *lp)
244 {
245         if (lp->cfg.mode == LDC_MODE_STREAM)
246                 return lp->tx_acked;
247         return lp->tx_head;
248 }
249
250 static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
251 {
252         unsigned long limit, tail, new_tail, diff;
253         unsigned int mss;
254
255         limit = head_for_data(lp);
256         tail = lp->tx_tail;
257         new_tail = tx_advance(lp, tail);
258         if (new_tail == limit)
259                 return 0;
260
261         if (limit > new_tail)
262                 diff = limit - new_tail;
263         else
264                 diff = (limit +
265                         ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
266         diff /= LDC_PACKET_SIZE;
267         mss = lp->mss;
268
269         if (diff * mss < size)
270                 return 0;
271
272         return 1;
273 }
274
275 static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
276                                              unsigned long *new_tail)
277 {
278         struct ldc_packet *p;
279         unsigned long h, t;
280
281         h = head_for_data(lp);
282         t = tx_advance(lp, lp->tx_tail);
283         if (t == h)
284                 return NULL;
285
286         *new_tail = t;
287
288         p = lp->tx_base;
289         return p + (lp->tx_tail / LDC_PACKET_SIZE);
290 }
291
292 static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
293 {
294         unsigned long orig_tail = lp->tx_tail;
295         int limit = 1000;
296
297         lp->tx_tail = tail;
298         while (limit-- > 0) {
299                 unsigned long err;
300
301                 err = sun4v_ldc_tx_set_qtail(lp->id, tail);
302                 if (!err)
303                         return 0;
304
305                 if (err != HV_EWOULDBLOCK) {
306                         lp->tx_tail = orig_tail;
307                         return -EINVAL;
308                 }
309                 udelay(1);
310         }
311
312         lp->tx_tail = orig_tail;
313         return -EBUSY;
314 }
315
316 /* This just updates the head value in the hypervisor using
317  * a polling loop with a timeout.  The caller takes care of
318  * upating software state representing the head change, if any.
319  */
320 static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
321 {
322         int limit = 1000;
323
324         while (limit-- > 0) {
325                 unsigned long err;
326
327                 err = sun4v_ldc_rx_set_qhead(lp->id, head);
328                 if (!err)
329                         return 0;
330
331                 if (err != HV_EWOULDBLOCK)
332                         return -EINVAL;
333
334                 udelay(1);
335         }
336
337         return -EBUSY;
338 }
339
340 static int send_tx_packet(struct ldc_channel *lp,
341                           struct ldc_packet *p,
342                           unsigned long new_tail)
343 {
344         BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
345
346         return set_tx_tail(lp, new_tail);
347 }
348
349 static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
350                                                  u8 stype, u8 ctrl,
351                                                  void *data, int dlen,
352                                                  unsigned long *new_tail)
353 {
354         struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
355
356         if (p) {
357                 memset(p, 0, sizeof(*p));
358                 p->type = LDC_CTRL;
359                 p->stype = stype;
360                 p->ctrl = ctrl;
361                 if (data)
362                         memcpy(p->u.u_data, data, dlen);
363         }
364         return p;
365 }
366
367 static int start_handshake(struct ldc_channel *lp)
368 {
369         struct ldc_packet *p;
370         struct ldc_version *ver;
371         unsigned long new_tail;
372
373         ver = &ver_arr[0];
374
375         ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
376                ver->major, ver->minor);
377
378         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
379                                    ver, sizeof(*ver), &new_tail);
380         if (p) {
381                 int err = send_tx_packet(lp, p, new_tail);
382                 if (!err)
383                         lp->flags &= ~LDC_FLAG_RESET;
384                 return err;
385         }
386         return -EBUSY;
387 }
388
389 static int send_version_nack(struct ldc_channel *lp,
390                              u16 major, u16 minor)
391 {
392         struct ldc_packet *p;
393         struct ldc_version ver;
394         unsigned long new_tail;
395
396         ver.major = major;
397         ver.minor = minor;
398
399         p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
400                                    &ver, sizeof(ver), &new_tail);
401         if (p) {
402                 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
403                        ver.major, ver.minor);
404
405                 return send_tx_packet(lp, p, new_tail);
406         }
407         return -EBUSY;
408 }
409
410 static int send_version_ack(struct ldc_channel *lp,
411                             struct ldc_version *vp)
412 {
413         struct ldc_packet *p;
414         unsigned long new_tail;
415
416         p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
417                                    vp, sizeof(*vp), &new_tail);
418         if (p) {
419                 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
420                        vp->major, vp->minor);
421
422                 return send_tx_packet(lp, p, new_tail);
423         }
424         return -EBUSY;
425 }
426
427 static int send_rts(struct ldc_channel *lp)
428 {
429         struct ldc_packet *p;
430         unsigned long new_tail;
431
432         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
433                                    &new_tail);
434         if (p) {
435                 p->env = lp->cfg.mode;
436                 p->seqid = 0;
437                 lp->rcv_nxt = 0;
438
439                 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
440                        p->env, p->seqid);
441
442                 return send_tx_packet(lp, p, new_tail);
443         }
444         return -EBUSY;
445 }
446
447 static int send_rtr(struct ldc_channel *lp)
448 {
449         struct ldc_packet *p;
450         unsigned long new_tail;
451
452         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
453                                    &new_tail);
454         if (p) {
455                 p->env = lp->cfg.mode;
456                 p->seqid = 0;
457
458                 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
459                        p->env, p->seqid);
460
461                 return send_tx_packet(lp, p, new_tail);
462         }
463         return -EBUSY;
464 }
465
466 static int send_rdx(struct ldc_channel *lp)
467 {
468         struct ldc_packet *p;
469         unsigned long new_tail;
470
471         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
472                                    &new_tail);
473         if (p) {
474                 p->env = 0;
475                 p->seqid = ++lp->snd_nxt;
476                 p->u.r.ackid = lp->rcv_nxt;
477
478                 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
479                        p->env, p->seqid, p->u.r.ackid);
480
481                 return send_tx_packet(lp, p, new_tail);
482         }
483         return -EBUSY;
484 }
485
486 static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
487 {
488         struct ldc_packet *p;
489         unsigned long new_tail;
490         int err;
491
492         p = data_get_tx_packet(lp, &new_tail);
493         if (!p)
494                 return -EBUSY;
495         memset(p, 0, sizeof(*p));
496         p->type = data_pkt->type;
497         p->stype = LDC_NACK;
498         p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
499         p->seqid = lp->snd_nxt + 1;
500         p->u.r.ackid = lp->rcv_nxt;
501
502         ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
503                p->type, p->ctrl, p->seqid, p->u.r.ackid);
504
505         err = send_tx_packet(lp, p, new_tail);
506         if (!err)
507                 lp->snd_nxt++;
508
509         return err;
510 }
511
512 static int ldc_abort(struct ldc_channel *lp, const char *msg)
513 {
514         unsigned long hv_err;
515
516         ldcdbg(STATE, "ABORT[%s]\n", msg);
517         ldc_print(lp);
518
519         /* We report but do not act upon the hypervisor errors because
520          * there really isn't much we can do if they fail at this point.
521          */
522         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
523         if (hv_err)
524                 printk(KERN_ERR PFX "ldc_abort: "
525                        "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
526                        lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
527
528         hv_err = sun4v_ldc_tx_get_state(lp->id,
529                                         &lp->tx_head,
530                                         &lp->tx_tail,
531                                         &lp->chan_state);
532         if (hv_err)
533                 printk(KERN_ERR PFX "ldc_abort: "
534                        "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
535                        lp->id, hv_err);
536
537         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
538         if (hv_err)
539                 printk(KERN_ERR PFX "ldc_abort: "
540                        "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
541                        lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
542
543         /* Refetch the RX queue state as well, because we could be invoked
544          * here in the queue processing context.
545          */
546         hv_err = sun4v_ldc_rx_get_state(lp->id,
547                                         &lp->rx_head,
548                                         &lp->rx_tail,
549                                         &lp->chan_state);
550         if (hv_err)
551                 printk(KERN_ERR PFX "ldc_abort: "
552                        "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
553                        lp->id, hv_err);
554
555         return -ECONNRESET;
556 }
557
558 static struct ldc_version *find_by_major(u16 major)
559 {
560         struct ldc_version *ret = NULL;
561         int i;
562
563         for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
564                 struct ldc_version *v = &ver_arr[i];
565                 if (v->major <= major) {
566                         ret = v;
567                         break;
568                 }
569         }
570         return ret;
571 }
572
573 static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
574 {
575         struct ldc_version *vap;
576         int err;
577
578         ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
579                vp->major, vp->minor);
580
581         if (lp->hs_state == LDC_HS_GOTVERS) {
582                 lp->hs_state = LDC_HS_OPEN;
583                 memset(&lp->ver, 0, sizeof(lp->ver));
584         }
585
586         vap = find_by_major(vp->major);
587         if (!vap) {
588                 err = send_version_nack(lp, 0, 0);
589         } else if (vap->major != vp->major) {
590                 err = send_version_nack(lp, vap->major, vap->minor);
591         } else {
592                 struct ldc_version ver = *vp;
593                 if (ver.minor > vap->minor)
594                         ver.minor = vap->minor;
595                 err = send_version_ack(lp, &ver);
596                 if (!err) {
597                         lp->ver = ver;
598                         lp->hs_state = LDC_HS_GOTVERS;
599                 }
600         }
601         if (err)
602                 return LDC_ABORT(lp);
603
604         return 0;
605 }
606
607 static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
608 {
609         ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
610                vp->major, vp->minor);
611
612         if (lp->hs_state == LDC_HS_GOTVERS) {
613                 if (lp->ver.major != vp->major ||
614                     lp->ver.minor != vp->minor)
615                         return LDC_ABORT(lp);
616         } else {
617                 lp->ver = *vp;
618                 lp->hs_state = LDC_HS_GOTVERS;
619         }
620         if (send_rts(lp))
621                 return LDC_ABORT(lp);
622         return 0;
623 }
624
625 static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
626 {
627         struct ldc_version *vap;
628         struct ldc_packet *p;
629         unsigned long new_tail;
630
631         if (vp->major == 0 && vp->minor == 0)
632                 return LDC_ABORT(lp);
633
634         vap = find_by_major(vp->major);
635         if (!vap)
636                 return LDC_ABORT(lp);
637
638         p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
639                                            vap, sizeof(*vap),
640                                            &new_tail);
641         if (!p)
642                 return LDC_ABORT(lp);
643
644         return send_tx_packet(lp, p, new_tail);
645 }
646
647 static int process_version(struct ldc_channel *lp,
648                            struct ldc_packet *p)
649 {
650         struct ldc_version *vp;
651
652         vp = (struct ldc_version *) p->u.u_data;
653
654         switch (p->stype) {
655         case LDC_INFO:
656                 return process_ver_info(lp, vp);
657
658         case LDC_ACK:
659                 return process_ver_ack(lp, vp);
660
661         case LDC_NACK:
662                 return process_ver_nack(lp, vp);
663
664         default:
665                 return LDC_ABORT(lp);
666         }
667 }
668
669 static int process_rts(struct ldc_channel *lp,
670                        struct ldc_packet *p)
671 {
672         ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
673                p->stype, p->seqid, p->env);
674
675         if (p->stype     != LDC_INFO       ||
676             lp->hs_state != LDC_HS_GOTVERS ||
677             p->env       != lp->cfg.mode)
678                 return LDC_ABORT(lp);
679
680         lp->snd_nxt = p->seqid;
681         lp->rcv_nxt = p->seqid;
682         lp->hs_state = LDC_HS_SENTRTR;
683         if (send_rtr(lp))
684                 return LDC_ABORT(lp);
685
686         return 0;
687 }
688
689 static int process_rtr(struct ldc_channel *lp,
690                        struct ldc_packet *p)
691 {
692         ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
693                p->stype, p->seqid, p->env);
694
695         if (p->stype     != LDC_INFO ||
696             p->env       != lp->cfg.mode)
697                 return LDC_ABORT(lp);
698
699         lp->snd_nxt = p->seqid;
700         lp->hs_state = LDC_HS_COMPLETE;
701         ldc_set_state(lp, LDC_STATE_CONNECTED);
702         send_rdx(lp);
703
704         return LDC_EVENT_UP;
705 }
706
707 static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
708 {
709         return lp->rcv_nxt + 1 == seqid;
710 }
711
712 static int process_rdx(struct ldc_channel *lp,
713                        struct ldc_packet *p)
714 {
715         ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
716                p->stype, p->seqid, p->env, p->u.r.ackid);
717
718         if (p->stype != LDC_INFO ||
719             !(rx_seq_ok(lp, p->seqid)))
720                 return LDC_ABORT(lp);
721
722         lp->rcv_nxt = p->seqid;
723
724         lp->hs_state = LDC_HS_COMPLETE;
725         ldc_set_state(lp, LDC_STATE_CONNECTED);
726
727         return LDC_EVENT_UP;
728 }
729
730 static int process_control_frame(struct ldc_channel *lp,
731                                  struct ldc_packet *p)
732 {
733         switch (p->ctrl) {
734         case LDC_VERS:
735                 return process_version(lp, p);
736
737         case LDC_RTS:
738                 return process_rts(lp, p);
739
740         case LDC_RTR:
741                 return process_rtr(lp, p);
742
743         case LDC_RDX:
744                 return process_rdx(lp, p);
745
746         default:
747                 return LDC_ABORT(lp);
748         }
749 }
750
751 static int process_error_frame(struct ldc_channel *lp,
752                                struct ldc_packet *p)
753 {
754         return LDC_ABORT(lp);
755 }
756
757 static int process_data_ack(struct ldc_channel *lp,
758                             struct ldc_packet *ack)
759 {
760         unsigned long head = lp->tx_acked;
761         u32 ackid = ack->u.r.ackid;
762
763         while (1) {
764                 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
765
766                 head = tx_advance(lp, head);
767
768                 if (p->seqid == ackid) {
769                         lp->tx_acked = head;
770                         return 0;
771                 }
772                 if (head == lp->tx_tail)
773                         return LDC_ABORT(lp);
774         }
775
776         return 0;
777 }
778
779 static void send_events(struct ldc_channel *lp, unsigned int event_mask)
780 {
781         if (event_mask & LDC_EVENT_RESET)
782                 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
783         if (event_mask & LDC_EVENT_UP)
784                 lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
785         if (event_mask & LDC_EVENT_DATA_READY)
786                 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
787 }
788
789 static irqreturn_t ldc_rx(int irq, void *dev_id)
790 {
791         struct ldc_channel *lp = dev_id;
792         unsigned long orig_state, flags;
793         unsigned int event_mask;
794
795         spin_lock_irqsave(&lp->lock, flags);
796
797         orig_state = lp->chan_state;
798
799         /* We should probably check for hypervisor errors here and
800          * reset the LDC channel if we get one.
801          */
802         sun4v_ldc_rx_get_state(lp->id,
803                                &lp->rx_head,
804                                &lp->rx_tail,
805                                &lp->chan_state);
806
807         ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
808                orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
809
810         event_mask = 0;
811
812         if (lp->cfg.mode == LDC_MODE_RAW &&
813             lp->chan_state == LDC_CHANNEL_UP) {
814                 lp->hs_state = LDC_HS_COMPLETE;
815                 ldc_set_state(lp, LDC_STATE_CONNECTED);
816
817                 /*
818                  * Generate an LDC_EVENT_UP event if the channel
819                  * was not already up.
820                  */
821                 if (orig_state != LDC_CHANNEL_UP) {
822                         event_mask |= LDC_EVENT_UP;
823                         orig_state = lp->chan_state;
824                 }
825         }
826
827         /* If we are in reset state, flush the RX queue and ignore
828          * everything.
829          */
830         if (lp->flags & LDC_FLAG_RESET) {
831                 (void) ldc_rx_reset(lp);
832                 goto out;
833         }
834
835         /* Once we finish the handshake, we let the ldc_read()
836          * paths do all of the control frame and state management.
837          * Just trigger the callback.
838          */
839         if (lp->hs_state == LDC_HS_COMPLETE) {
840 handshake_complete:
841                 if (lp->chan_state != orig_state) {
842                         unsigned int event = LDC_EVENT_RESET;
843
844                         if (lp->chan_state == LDC_CHANNEL_UP)
845                                 event = LDC_EVENT_UP;
846
847                         event_mask |= event;
848                 }
849                 if (lp->rx_head != lp->rx_tail)
850                         event_mask |= LDC_EVENT_DATA_READY;
851
852                 goto out;
853         }
854
855         if (lp->chan_state != orig_state)
856                 goto out;
857
858         while (lp->rx_head != lp->rx_tail) {
859                 struct ldc_packet *p;
860                 unsigned long new;
861                 int err;
862
863                 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
864
865                 switch (p->type) {
866                 case LDC_CTRL:
867                         err = process_control_frame(lp, p);
868                         if (err > 0)
869                                 event_mask |= err;
870                         break;
871
872                 case LDC_DATA:
873                         event_mask |= LDC_EVENT_DATA_READY;
874                         err = 0;
875                         break;
876
877                 case LDC_ERR:
878                         err = process_error_frame(lp, p);
879                         break;
880
881                 default:
882                         err = LDC_ABORT(lp);
883                         break;
884                 }
885
886                 if (err < 0)
887                         break;
888
889                 new = lp->rx_head;
890                 new += LDC_PACKET_SIZE;
891                 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
892                         new = 0;
893                 lp->rx_head = new;
894
895                 err = __set_rx_head(lp, new);
896                 if (err < 0) {
897                         (void) LDC_ABORT(lp);
898                         break;
899                 }
900                 if (lp->hs_state == LDC_HS_COMPLETE)
901                         goto handshake_complete;
902         }
903
904 out:
905         spin_unlock_irqrestore(&lp->lock, flags);
906
907         send_events(lp, event_mask);
908
909         return IRQ_HANDLED;
910 }
911
912 static irqreturn_t ldc_tx(int irq, void *dev_id)
913 {
914         struct ldc_channel *lp = dev_id;
915         unsigned long flags, orig_state;
916         unsigned int event_mask = 0;
917
918         spin_lock_irqsave(&lp->lock, flags);
919
920         orig_state = lp->chan_state;
921
922         /* We should probably check for hypervisor errors here and
923          * reset the LDC channel if we get one.
924          */
925         sun4v_ldc_tx_get_state(lp->id,
926                                &lp->tx_head,
927                                &lp->tx_tail,
928                                &lp->chan_state);
929
930         ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
931                orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
932
933         if (lp->cfg.mode == LDC_MODE_RAW &&
934             lp->chan_state == LDC_CHANNEL_UP) {
935                 lp->hs_state = LDC_HS_COMPLETE;
936                 ldc_set_state(lp, LDC_STATE_CONNECTED);
937
938                 /*
939                  * Generate an LDC_EVENT_UP event if the channel
940                  * was not already up.
941                  */
942                 if (orig_state != LDC_CHANNEL_UP) {
943                         event_mask |= LDC_EVENT_UP;
944                         orig_state = lp->chan_state;
945                 }
946         }
947
948         spin_unlock_irqrestore(&lp->lock, flags);
949
950         send_events(lp, event_mask);
951
952         return IRQ_HANDLED;
953 }
954
955 /* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
956  * XXX that addition and removal from the ldc_channel_list has
957  * XXX atomicity, otherwise the __ldc_channel_exists() check is
958  * XXX totally pointless as another thread can slip into ldc_alloc()
959  * XXX and add a channel with the same ID.  There also needs to be
960  * XXX a spinlock for ldc_channel_list.
961  */
962 static HLIST_HEAD(ldc_channel_list);
963
964 static int __ldc_channel_exists(unsigned long id)
965 {
966         struct ldc_channel *lp;
967
968         hlist_for_each_entry(lp, &ldc_channel_list, list) {
969                 if (lp->id == id)
970                         return 1;
971         }
972         return 0;
973 }
974
975 static int alloc_queue(const char *name, unsigned long num_entries,
976                        struct ldc_packet **base, unsigned long *ra)
977 {
978         unsigned long size, order;
979         void *q;
980
981         size = num_entries * LDC_PACKET_SIZE;
982         order = get_order(size);
983
984         q = (void *) __get_free_pages(GFP_KERNEL, order);
985         if (!q) {
986                 printk(KERN_ERR PFX "Alloc of %s queue failed with "
987                        "size=%lu order=%lu\n", name, size, order);
988                 return -ENOMEM;
989         }
990
991         memset(q, 0, PAGE_SIZE << order);
992
993         *base = q;
994         *ra = __pa(q);
995
996         return 0;
997 }
998
999 static void free_queue(unsigned long num_entries, struct ldc_packet *q)
1000 {
1001         unsigned long size, order;
1002
1003         if (!q)
1004                 return;
1005
1006         size = num_entries * LDC_PACKET_SIZE;
1007         order = get_order(size);
1008
1009         free_pages((unsigned long)q, order);
1010 }
1011
1012 static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1013 {
1014         u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1015         /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1016
1017         cookie &= ~COOKIE_PGSZ_CODE;
1018
1019         return (cookie >> (13ULL + (szcode * 3ULL)));
1020 }
1021
1022 static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
1023                       unsigned long entry, unsigned long npages)
1024 {
1025         struct ldc_mtable_entry *base;
1026         unsigned long i, shift;
1027
1028         shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1029         base = iommu->page_table + entry;
1030         for (i = 0; i < npages; i++) {
1031                 if (base->cookie)
1032                         sun4v_ldc_revoke(id, cookie + (i << shift),
1033                                          base->cookie);
1034                 base->mte = 0;
1035         }
1036 }
1037
1038 /* XXX Make this configurable... XXX */
1039 #define LDC_IOTABLE_SIZE        (8 * 1024)
1040
1041 static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1042 {
1043         unsigned long sz, num_tsb_entries, tsbsize, order;
1044         struct ldc_iommu *ldc_iommu = &lp->iommu;
1045         struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1046         struct ldc_mtable_entry *table;
1047         unsigned long hv_err;
1048         int err;
1049
1050         num_tsb_entries = LDC_IOTABLE_SIZE;
1051         tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1052         spin_lock_init(&ldc_iommu->lock);
1053
1054         sz = num_tsb_entries / 8;
1055         sz = (sz + 7UL) & ~7UL;
1056         iommu->map = kzalloc(sz, GFP_KERNEL);
1057         if (!iommu->map) {
1058                 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1059                 return -ENOMEM;
1060         }
1061         iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1062                             NULL, false /* no large pool */,
1063                             1 /* npools */,
1064                             true /* skip span boundary check */);
1065
1066         order = get_order(tsbsize);
1067
1068         table = (struct ldc_mtable_entry *)
1069                 __get_free_pages(GFP_KERNEL, order);
1070         err = -ENOMEM;
1071         if (!table) {
1072                 printk(KERN_ERR PFX "Alloc of MTE table failed, "
1073                        "size=%lu order=%lu\n", tsbsize, order);
1074                 goto out_free_map;
1075         }
1076
1077         memset(table, 0, PAGE_SIZE << order);
1078
1079         ldc_iommu->page_table = table;
1080
1081         hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1082                                          num_tsb_entries);
1083         err = -EINVAL;
1084         if (hv_err)
1085                 goto out_free_table;
1086
1087         return 0;
1088
1089 out_free_table:
1090         free_pages((unsigned long) table, order);
1091         ldc_iommu->page_table = NULL;
1092
1093 out_free_map:
1094         kfree(iommu->map);
1095         iommu->map = NULL;
1096
1097         return err;
1098 }
1099
1100 static void ldc_iommu_release(struct ldc_channel *lp)
1101 {
1102         struct ldc_iommu *ldc_iommu = &lp->iommu;
1103         struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1104         unsigned long num_tsb_entries, tsbsize, order;
1105
1106         (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1107
1108         num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1109         tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1110         order = get_order(tsbsize);
1111
1112         free_pages((unsigned long) ldc_iommu->page_table, order);
1113         ldc_iommu->page_table = NULL;
1114
1115         kfree(iommu->map);
1116         iommu->map = NULL;
1117 }
1118
1119 struct ldc_channel *ldc_alloc(unsigned long id,
1120                               const struct ldc_channel_config *cfgp,
1121                               void *event_arg,
1122                               const char *name)
1123 {
1124         struct ldc_channel *lp;
1125         const struct ldc_mode_ops *mops;
1126         unsigned long dummy1, dummy2, hv_err;
1127         u8 mss, *mssbuf;
1128         int err;
1129
1130         err = -ENODEV;
1131         if (!ldom_domaining_enabled)
1132                 goto out_err;
1133
1134         err = -EINVAL;
1135         if (!cfgp)
1136                 goto out_err;
1137         if (!name)
1138                 goto out_err;
1139
1140         switch (cfgp->mode) {
1141         case LDC_MODE_RAW:
1142                 mops = &raw_ops;
1143                 mss = LDC_PACKET_SIZE;
1144                 break;
1145
1146         case LDC_MODE_UNRELIABLE:
1147                 mops = &nonraw_ops;
1148                 mss = LDC_PACKET_SIZE - 8;
1149                 break;
1150
1151         case LDC_MODE_STREAM:
1152                 mops = &stream_ops;
1153                 mss = LDC_PACKET_SIZE - 8 - 8;
1154                 break;
1155
1156         default:
1157                 goto out_err;
1158         }
1159
1160         if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1161                 goto out_err;
1162
1163         hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1164         err = -ENODEV;
1165         if (hv_err == HV_ECHANNEL)
1166                 goto out_err;
1167
1168         err = -EEXIST;
1169         if (__ldc_channel_exists(id))
1170                 goto out_err;
1171
1172         mssbuf = NULL;
1173
1174         lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1175         err = -ENOMEM;
1176         if (!lp)
1177                 goto out_err;
1178
1179         spin_lock_init(&lp->lock);
1180
1181         lp->id = id;
1182
1183         err = ldc_iommu_init(name, lp);
1184         if (err)
1185                 goto out_free_ldc;
1186
1187         lp->mops = mops;
1188         lp->mss = mss;
1189
1190         lp->cfg = *cfgp;
1191         if (!lp->cfg.mtu)
1192                 lp->cfg.mtu = LDC_DEFAULT_MTU;
1193
1194         if (lp->cfg.mode == LDC_MODE_STREAM) {
1195                 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1196                 if (!mssbuf) {
1197                         err = -ENOMEM;
1198                         goto out_free_iommu;
1199                 }
1200                 lp->mssbuf = mssbuf;
1201         }
1202
1203         lp->event_arg = event_arg;
1204
1205         /* XXX allow setting via ldc_channel_config to override defaults
1206          * XXX or use some formula based upon mtu
1207          */
1208         lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1209         lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1210
1211         err = alloc_queue("TX", lp->tx_num_entries,
1212                           &lp->tx_base, &lp->tx_ra);
1213         if (err)
1214                 goto out_free_mssbuf;
1215
1216         err = alloc_queue("RX", lp->rx_num_entries,
1217                           &lp->rx_base, &lp->rx_ra);
1218         if (err)
1219                 goto out_free_txq;
1220
1221         lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1222
1223         lp->hs_state = LDC_HS_CLOSED;
1224         ldc_set_state(lp, LDC_STATE_INIT);
1225
1226         INIT_HLIST_NODE(&lp->list);
1227         hlist_add_head(&lp->list, &ldc_channel_list);
1228
1229         INIT_HLIST_HEAD(&lp->mh_list);
1230
1231         snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1232         snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1233
1234         err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1235                           lp->rx_irq_name, lp);
1236         if (err)
1237                 goto out_free_txq;
1238
1239         err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1240                           lp->tx_irq_name, lp);
1241         if (err) {
1242                 free_irq(lp->cfg.rx_irq, lp);
1243                 goto out_free_txq;
1244         }
1245
1246         return lp;
1247
1248 out_free_txq:
1249         free_queue(lp->tx_num_entries, lp->tx_base);
1250
1251 out_free_mssbuf:
1252         kfree(mssbuf);
1253
1254 out_free_iommu:
1255         ldc_iommu_release(lp);
1256
1257 out_free_ldc:
1258         kfree(lp);
1259
1260 out_err:
1261         return ERR_PTR(err);
1262 }
1263 EXPORT_SYMBOL(ldc_alloc);
1264
1265 void ldc_unbind(struct ldc_channel *lp)
1266 {
1267         if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1268                 free_irq(lp->cfg.rx_irq, lp);
1269                 free_irq(lp->cfg.tx_irq, lp);
1270                 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1271         }
1272
1273         if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1274                 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1275                 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1276                 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1277         }
1278         if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1279                 free_queue(lp->tx_num_entries, lp->tx_base);
1280                 free_queue(lp->rx_num_entries, lp->rx_base);
1281                 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1282         }
1283
1284         ldc_set_state(lp, LDC_STATE_INIT);
1285 }
1286 EXPORT_SYMBOL(ldc_unbind);
1287
1288 void ldc_free(struct ldc_channel *lp)
1289 {
1290         ldc_unbind(lp);
1291         hlist_del(&lp->list);
1292         kfree(lp->mssbuf);
1293         ldc_iommu_release(lp);
1294
1295         kfree(lp);
1296 }
1297 EXPORT_SYMBOL(ldc_free);
1298
1299 /* Bind the channel.  This registers the LDC queues with
1300  * the hypervisor and puts the channel into a pseudo-listening
1301  * state.  This does not initiate a handshake, ldc_connect() does
1302  * that.
1303  */
1304 int ldc_bind(struct ldc_channel *lp)
1305 {
1306         unsigned long hv_err, flags;
1307         int err = -EINVAL;
1308
1309         if (lp->state != LDC_STATE_INIT)
1310                 return -EINVAL;
1311
1312         spin_lock_irqsave(&lp->lock, flags);
1313
1314         enable_irq(lp->cfg.rx_irq);
1315         enable_irq(lp->cfg.tx_irq);
1316
1317         lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1318
1319         err = -ENODEV;
1320         hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1321         if (hv_err)
1322                 goto out_free_irqs;
1323
1324         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1325         if (hv_err)
1326                 goto out_free_irqs;
1327
1328         hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1329         if (hv_err)
1330                 goto out_unmap_tx;
1331
1332         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1333         if (hv_err)
1334                 goto out_unmap_tx;
1335
1336         lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1337
1338         hv_err = sun4v_ldc_tx_get_state(lp->id,
1339                                         &lp->tx_head,
1340                                         &lp->tx_tail,
1341                                         &lp->chan_state);
1342         err = -EBUSY;
1343         if (hv_err)
1344                 goto out_unmap_rx;
1345
1346         lp->tx_acked = lp->tx_head;
1347
1348         lp->hs_state = LDC_HS_OPEN;
1349         ldc_set_state(lp, LDC_STATE_BOUND);
1350
1351         if (lp->cfg.mode == LDC_MODE_RAW) {
1352                 /*
1353                  * There is no handshake in RAW mode, so handshake
1354                  * is completed.
1355                  */
1356                 lp->hs_state = LDC_HS_COMPLETE;
1357         }
1358
1359         spin_unlock_irqrestore(&lp->lock, flags);
1360
1361         return 0;
1362
1363 out_unmap_rx:
1364         lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1365         sun4v_ldc_rx_qconf(lp->id, 0, 0);
1366
1367 out_unmap_tx:
1368         sun4v_ldc_tx_qconf(lp->id, 0, 0);
1369
1370 out_free_irqs:
1371         lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1372         free_irq(lp->cfg.tx_irq, lp);
1373         free_irq(lp->cfg.rx_irq, lp);
1374
1375         spin_unlock_irqrestore(&lp->lock, flags);
1376
1377         return err;
1378 }
1379 EXPORT_SYMBOL(ldc_bind);
1380
1381 int ldc_connect(struct ldc_channel *lp)
1382 {
1383         unsigned long flags;
1384         int err;
1385
1386         if (lp->cfg.mode == LDC_MODE_RAW)
1387                 return -EINVAL;
1388
1389         spin_lock_irqsave(&lp->lock, flags);
1390
1391         if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1392             !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1393             lp->hs_state != LDC_HS_OPEN)
1394                 err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1395         else
1396                 err = start_handshake(lp);
1397
1398         spin_unlock_irqrestore(&lp->lock, flags);
1399
1400         return err;
1401 }
1402 EXPORT_SYMBOL(ldc_connect);
1403
1404 int ldc_disconnect(struct ldc_channel *lp)
1405 {
1406         unsigned long hv_err, flags;
1407         int err;
1408
1409         if (lp->cfg.mode == LDC_MODE_RAW)
1410                 return -EINVAL;
1411
1412         if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1413             !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1414                 return -EINVAL;
1415
1416         spin_lock_irqsave(&lp->lock, flags);
1417
1418         err = -ENODEV;
1419         hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1420         if (hv_err)
1421                 goto out_err;
1422
1423         hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1424         if (hv_err)
1425                 goto out_err;
1426
1427         hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1428         if (hv_err)
1429                 goto out_err;
1430
1431         hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1432         if (hv_err)
1433                 goto out_err;
1434
1435         ldc_set_state(lp, LDC_STATE_BOUND);
1436         lp->hs_state = LDC_HS_OPEN;
1437         lp->flags |= LDC_FLAG_RESET;
1438
1439         spin_unlock_irqrestore(&lp->lock, flags);
1440
1441         return 0;
1442
1443 out_err:
1444         sun4v_ldc_tx_qconf(lp->id, 0, 0);
1445         sun4v_ldc_rx_qconf(lp->id, 0, 0);
1446         free_irq(lp->cfg.tx_irq, lp);
1447         free_irq(lp->cfg.rx_irq, lp);
1448         lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1449                        LDC_FLAG_REGISTERED_QUEUES);
1450         ldc_set_state(lp, LDC_STATE_INIT);
1451
1452         spin_unlock_irqrestore(&lp->lock, flags);
1453
1454         return err;
1455 }
1456 EXPORT_SYMBOL(ldc_disconnect);
1457
1458 int ldc_state(struct ldc_channel *lp)
1459 {
1460         return lp->state;
1461 }
1462 EXPORT_SYMBOL(ldc_state);
1463
1464 void ldc_set_state(struct ldc_channel *lp, u8 state)
1465 {
1466         ldcdbg(STATE, "STATE (%s) --> (%s)\n",
1467                state_to_str(lp->state),
1468                state_to_str(state));
1469
1470         lp->state = state;
1471 }
1472 EXPORT_SYMBOL(ldc_set_state);
1473
1474 int ldc_mode(struct ldc_channel *lp)
1475 {
1476         return lp->cfg.mode;
1477 }
1478 EXPORT_SYMBOL(ldc_mode);
1479
1480 int ldc_rx_reset(struct ldc_channel *lp)
1481 {
1482         return __set_rx_head(lp, lp->rx_tail);
1483 }
1484 EXPORT_SYMBOL(ldc_rx_reset);
1485
1486 void __ldc_print(struct ldc_channel *lp, const char *caller)
1487 {
1488         pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n"
1489                 "\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n"
1490                 "\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n"
1491                 "\trcv_nxt=%u snd_nxt=%u\n",
1492                 caller, lp->id, lp->flags, state_to_str(lp->state),
1493                 lp->chan_state, lp->hs_state,
1494                 lp->rx_head, lp->rx_tail, lp->rx_num_entries,
1495                 lp->tx_head, lp->tx_tail, lp->tx_num_entries,
1496                 lp->rcv_nxt, lp->snd_nxt);
1497 }
1498 EXPORT_SYMBOL(__ldc_print);
1499
1500 static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1501 {
1502         struct ldc_packet *p;
1503         unsigned long new_tail, hv_err;
1504         int err;
1505
1506         hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1507                                         &lp->chan_state);
1508         if (unlikely(hv_err))
1509                 return -EBUSY;
1510
1511         if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1512                 return LDC_ABORT(lp);
1513
1514         if (size > LDC_PACKET_SIZE)
1515                 return -EMSGSIZE;
1516
1517         p = data_get_tx_packet(lp, &new_tail);
1518         if (!p)
1519                 return -EAGAIN;
1520
1521         memcpy(p, buf, size);
1522
1523         err = send_tx_packet(lp, p, new_tail);
1524         if (!err)
1525                 err = size;
1526
1527         return err;
1528 }
1529
1530 static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1531 {
1532         struct ldc_packet *p;
1533         unsigned long hv_err, new;
1534         int err;
1535
1536         if (size < LDC_PACKET_SIZE)
1537                 return -EINVAL;
1538
1539         hv_err = sun4v_ldc_rx_get_state(lp->id,
1540                                         &lp->rx_head,
1541                                         &lp->rx_tail,
1542                                         &lp->chan_state);
1543         if (hv_err)
1544                 return LDC_ABORT(lp);
1545
1546         if (lp->chan_state == LDC_CHANNEL_DOWN ||
1547             lp->chan_state == LDC_CHANNEL_RESETTING)
1548                 return -ECONNRESET;
1549
1550         if (lp->rx_head == lp->rx_tail)
1551                 return 0;
1552
1553         p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1554         memcpy(buf, p, LDC_PACKET_SIZE);
1555
1556         new = rx_advance(lp, lp->rx_head);
1557         lp->rx_head = new;
1558
1559         err = __set_rx_head(lp, new);
1560         if (err < 0)
1561                 err = -ECONNRESET;
1562         else
1563                 err = LDC_PACKET_SIZE;
1564
1565         return err;
1566 }
1567
1568 static const struct ldc_mode_ops raw_ops = {
1569         .write          =       write_raw,
1570         .read           =       read_raw,
1571 };
1572
1573 static int write_nonraw(struct ldc_channel *lp, const void *buf,
1574                         unsigned int size)
1575 {
1576         unsigned long hv_err, tail;
1577         unsigned int copied;
1578         u32 seq;
1579         int err;
1580
1581         hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1582                                         &lp->chan_state);
1583         if (unlikely(hv_err))
1584                 return -EBUSY;
1585
1586         if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1587                 return LDC_ABORT(lp);
1588
1589         if (!tx_has_space_for(lp, size))
1590                 return -EAGAIN;
1591
1592         seq = lp->snd_nxt;
1593         copied = 0;
1594         tail = lp->tx_tail;
1595         while (copied < size) {
1596                 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1597                 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1598                             p->u.u_data :
1599                             p->u.r.r_data);
1600                 int data_len;
1601
1602                 p->type = LDC_DATA;
1603                 p->stype = LDC_INFO;
1604                 p->ctrl = 0;
1605
1606                 data_len = size - copied;
1607                 if (data_len > lp->mss)
1608                         data_len = lp->mss;
1609
1610                 BUG_ON(data_len > LDC_LEN);
1611
1612                 p->env = (data_len |
1613                           (copied == 0 ? LDC_START : 0) |
1614                           (data_len == size - copied ? LDC_STOP : 0));
1615
1616                 p->seqid = ++seq;
1617
1618                 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1619                        p->type,
1620                        p->stype,
1621                        p->ctrl,
1622                        p->env,
1623                        p->seqid);
1624
1625                 memcpy(data, buf, data_len);
1626                 buf += data_len;
1627                 copied += data_len;
1628
1629                 tail = tx_advance(lp, tail);
1630         }
1631
1632         err = set_tx_tail(lp, tail);
1633         if (!err) {
1634                 lp->snd_nxt = seq;
1635                 err = size;
1636         }
1637
1638         return err;
1639 }
1640
1641 static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1642                       struct ldc_packet *first_frag)
1643 {
1644         int err;
1645
1646         if (first_frag)
1647                 lp->rcv_nxt = first_frag->seqid - 1;
1648
1649         err = send_data_nack(lp, p);
1650         if (err)
1651                 return err;
1652
1653         err = ldc_rx_reset(lp);
1654         if (err < 0)
1655                 return LDC_ABORT(lp);
1656
1657         return 0;
1658 }
1659
1660 static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1661 {
1662         if (p->stype & LDC_ACK) {
1663                 int err = process_data_ack(lp, p);
1664                 if (err)
1665                         return err;
1666         }
1667         if (p->stype & LDC_NACK)
1668                 return LDC_ABORT(lp);
1669
1670         return 0;
1671 }
1672
1673 static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1674 {
1675         unsigned long dummy;
1676         int limit = 1000;
1677
1678         ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1679                cur_head, lp->rx_head, lp->rx_tail);
1680         while (limit-- > 0) {
1681                 unsigned long hv_err;
1682
1683                 hv_err = sun4v_ldc_rx_get_state(lp->id,
1684                                                 &dummy,
1685                                                 &lp->rx_tail,
1686                                                 &lp->chan_state);
1687                 if (hv_err)
1688                         return LDC_ABORT(lp);
1689
1690                 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1691                     lp->chan_state == LDC_CHANNEL_RESETTING)
1692                         return -ECONNRESET;
1693
1694                 if (cur_head != lp->rx_tail) {
1695                         ldcdbg(DATA, "DATA WAIT DONE "
1696                                "head[%lx] tail[%lx] chan_state[%lx]\n",
1697                                dummy, lp->rx_tail, lp->chan_state);
1698                         return 0;
1699                 }
1700
1701                 udelay(1);
1702         }
1703         return -EAGAIN;
1704 }
1705
1706 static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1707 {
1708         int err = __set_rx_head(lp, head);
1709
1710         if (err < 0)
1711                 return LDC_ABORT(lp);
1712
1713         lp->rx_head = head;
1714         return 0;
1715 }
1716
1717 static void send_data_ack(struct ldc_channel *lp)
1718 {
1719         unsigned long new_tail;
1720         struct ldc_packet *p;
1721
1722         p = data_get_tx_packet(lp, &new_tail);
1723         if (likely(p)) {
1724                 int err;
1725
1726                 memset(p, 0, sizeof(*p));
1727                 p->type = LDC_DATA;
1728                 p->stype = LDC_ACK;
1729                 p->ctrl = 0;
1730                 p->seqid = lp->snd_nxt + 1;
1731                 p->u.r.ackid = lp->rcv_nxt;
1732
1733                 err = send_tx_packet(lp, p, new_tail);
1734                 if (!err)
1735                         lp->snd_nxt++;
1736         }
1737 }
1738
1739 static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1740 {
1741         struct ldc_packet *first_frag;
1742         unsigned long hv_err, new;
1743         int err, copied;
1744
1745         hv_err = sun4v_ldc_rx_get_state(lp->id,
1746                                         &lp->rx_head,
1747                                         &lp->rx_tail,
1748                                         &lp->chan_state);
1749         if (hv_err)
1750                 return LDC_ABORT(lp);
1751
1752         if (lp->chan_state == LDC_CHANNEL_DOWN ||
1753             lp->chan_state == LDC_CHANNEL_RESETTING)
1754                 return -ECONNRESET;
1755
1756         if (lp->rx_head == lp->rx_tail)
1757                 return 0;
1758
1759         first_frag = NULL;
1760         copied = err = 0;
1761         new = lp->rx_head;
1762         while (1) {
1763                 struct ldc_packet *p;
1764                 int pkt_len;
1765
1766                 BUG_ON(new == lp->rx_tail);
1767                 p = lp->rx_base + (new / LDC_PACKET_SIZE);
1768
1769                 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1770                        "rcv_nxt[%08x]\n",
1771                        p->type,
1772                        p->stype,
1773                        p->ctrl,
1774                        p->env,
1775                        p->seqid,
1776                        p->u.r.ackid,
1777                        lp->rcv_nxt);
1778
1779                 if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1780                         err = rx_bad_seq(lp, p, first_frag);
1781                         copied = 0;
1782                         break;
1783                 }
1784
1785                 if (p->type & LDC_CTRL) {
1786                         err = process_control_frame(lp, p);
1787                         if (err < 0)
1788                                 break;
1789                         err = 0;
1790                 }
1791
1792                 lp->rcv_nxt = p->seqid;
1793
1794                 /*
1795                  * If this is a control-only packet, there is nothing
1796                  * else to do but advance the rx queue since the packet
1797                  * was already processed above.
1798                  */
1799                 if (!(p->type & LDC_DATA)) {
1800                         new = rx_advance(lp, new);
1801                         break;
1802                 }
1803                 if (p->stype & (LDC_ACK | LDC_NACK)) {
1804                         err = data_ack_nack(lp, p);
1805                         if (err)
1806                                 break;
1807                 }
1808                 if (!(p->stype & LDC_INFO)) {
1809                         new = rx_advance(lp, new);
1810                         err = rx_set_head(lp, new);
1811                         if (err)
1812                                 break;
1813                         goto no_data;
1814                 }
1815
1816                 pkt_len = p->env & LDC_LEN;
1817
1818                 /* Every initial packet starts with the START bit set.
1819                  *
1820                  * Singleton packets will have both START+STOP set.
1821                  *
1822                  * Fragments will have START set in the first frame, STOP
1823                  * set in the last frame, and neither bit set in middle
1824                  * frames of the packet.
1825                  *
1826                  * Therefore if we are at the beginning of a packet and
1827                  * we don't see START, or we are in the middle of a fragmented
1828                  * packet and do see START, we are unsynchronized and should
1829                  * flush the RX queue.
1830                  */
1831                 if ((first_frag == NULL && !(p->env & LDC_START)) ||
1832                     (first_frag != NULL &&  (p->env & LDC_START))) {
1833                         if (!first_frag)
1834                                 new = rx_advance(lp, new);
1835
1836                         err = rx_set_head(lp, new);
1837                         if (err)
1838                                 break;
1839
1840                         if (!first_frag)
1841                                 goto no_data;
1842                 }
1843                 if (!first_frag)
1844                         first_frag = p;
1845
1846                 if (pkt_len > size - copied) {
1847                         /* User didn't give us a big enough buffer,
1848                          * what to do?  This is a pretty serious error.
1849                          *
1850                          * Since we haven't updated the RX ring head to
1851                          * consume any of the packets, signal the error
1852                          * to the user and just leave the RX ring alone.
1853                          *
1854                          * This seems the best behavior because this allows
1855                          * a user of the LDC layer to start with a small
1856                          * RX buffer for ldc_read() calls and use -EMSGSIZE
1857                          * as a cue to enlarge it's read buffer.
1858                          */
1859                         err = -EMSGSIZE;
1860                         break;
1861                 }
1862
1863                 /* Ok, we are gonna eat this one.  */
1864                 new = rx_advance(lp, new);
1865
1866                 memcpy(buf,
1867                        (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1868                         p->u.u_data : p->u.r.r_data), pkt_len);
1869                 buf += pkt_len;
1870                 copied += pkt_len;
1871
1872                 if (p->env & LDC_STOP)
1873                         break;
1874
1875 no_data:
1876                 if (new == lp->rx_tail) {
1877                         err = rx_data_wait(lp, new);
1878                         if (err)
1879                                 break;
1880                 }
1881         }
1882
1883         if (!err)
1884                 err = rx_set_head(lp, new);
1885
1886         if (err && first_frag)
1887                 lp->rcv_nxt = first_frag->seqid - 1;
1888
1889         if (!err) {
1890                 err = copied;
1891                 if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1892                         send_data_ack(lp);
1893         }
1894
1895         return err;
1896 }
1897
1898 static const struct ldc_mode_ops nonraw_ops = {
1899         .write          =       write_nonraw,
1900         .read           =       read_nonraw,
1901 };
1902
1903 static int write_stream(struct ldc_channel *lp, const void *buf,
1904                         unsigned int size)
1905 {
1906         if (size > lp->cfg.mtu)
1907                 size = lp->cfg.mtu;
1908         return write_nonraw(lp, buf, size);
1909 }
1910
1911 static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1912 {
1913         if (!lp->mssbuf_len) {
1914                 int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1915                 if (err < 0)
1916                         return err;
1917
1918                 lp->mssbuf_len = err;
1919                 lp->mssbuf_off = 0;
1920         }
1921
1922         if (size > lp->mssbuf_len)
1923                 size = lp->mssbuf_len;
1924         memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1925
1926         lp->mssbuf_off += size;
1927         lp->mssbuf_len -= size;
1928
1929         return size;
1930 }
1931
1932 static const struct ldc_mode_ops stream_ops = {
1933         .write          =       write_stream,
1934         .read           =       read_stream,
1935 };
1936
1937 int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1938 {
1939         unsigned long flags;
1940         int err;
1941
1942         if (!buf)
1943                 return -EINVAL;
1944
1945         if (!size)
1946                 return 0;
1947
1948         spin_lock_irqsave(&lp->lock, flags);
1949
1950         if (lp->hs_state != LDC_HS_COMPLETE)
1951                 err = -ENOTCONN;
1952         else
1953                 err = lp->mops->write(lp, buf, size);
1954
1955         spin_unlock_irqrestore(&lp->lock, flags);
1956
1957         return err;
1958 }
1959 EXPORT_SYMBOL(ldc_write);
1960
1961 int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1962 {
1963         unsigned long flags;
1964         int err;
1965
1966         ldcdbg(RX, "%s: entered size=%d\n", __func__, size);
1967
1968         if (!buf)
1969                 return -EINVAL;
1970
1971         if (!size)
1972                 return 0;
1973
1974         spin_lock_irqsave(&lp->lock, flags);
1975
1976         if (lp->hs_state != LDC_HS_COMPLETE)
1977                 err = -ENOTCONN;
1978         else
1979                 err = lp->mops->read(lp, buf, size);
1980
1981         spin_unlock_irqrestore(&lp->lock, flags);
1982
1983         ldcdbg(RX, "%s: mode=%d, head=%lu, tail=%lu rv=%d\n", __func__,
1984                lp->cfg.mode, lp->rx_head, lp->rx_tail, err);
1985
1986         return err;
1987 }
1988 EXPORT_SYMBOL(ldc_read);
1989
1990 static u64 pagesize_code(void)
1991 {
1992         switch (PAGE_SIZE) {
1993         default:
1994         case (8ULL * 1024ULL):
1995                 return 0;
1996         case (64ULL * 1024ULL):
1997                 return 1;
1998         case (512ULL * 1024ULL):
1999                 return 2;
2000         case (4ULL * 1024ULL * 1024ULL):
2001                 return 3;
2002         case (32ULL * 1024ULL * 1024ULL):
2003                 return 4;
2004         case (256ULL * 1024ULL * 1024ULL):
2005                 return 5;
2006         }
2007 }
2008
2009 static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
2010 {
2011         return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
2012                 (index << PAGE_SHIFT) |
2013                 page_offset);
2014 }
2015
2016
2017 static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
2018                                              unsigned long npages)
2019 {
2020         long entry;
2021
2022         entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
2023                                       npages, NULL, (unsigned long)-1, 0);
2024         if (unlikely(entry == IOMMU_ERROR_CODE))
2025                 return NULL;
2026
2027         return iommu->page_table + entry;
2028 }
2029
2030 static u64 perm_to_mte(unsigned int map_perm)
2031 {
2032         u64 mte_base;
2033
2034         mte_base = pagesize_code();
2035
2036         if (map_perm & LDC_MAP_SHADOW) {
2037                 if (map_perm & LDC_MAP_R)
2038                         mte_base |= LDC_MTE_COPY_R;
2039                 if (map_perm & LDC_MAP_W)
2040                         mte_base |= LDC_MTE_COPY_W;
2041         }
2042         if (map_perm & LDC_MAP_DIRECT) {
2043                 if (map_perm & LDC_MAP_R)
2044                         mte_base |= LDC_MTE_READ;
2045                 if (map_perm & LDC_MAP_W)
2046                         mte_base |= LDC_MTE_WRITE;
2047                 if (map_perm & LDC_MAP_X)
2048                         mte_base |= LDC_MTE_EXEC;
2049         }
2050         if (map_perm & LDC_MAP_IO) {
2051                 if (map_perm & LDC_MAP_R)
2052                         mte_base |= LDC_MTE_IOMMU_R;
2053                 if (map_perm & LDC_MAP_W)
2054                         mte_base |= LDC_MTE_IOMMU_W;
2055         }
2056
2057         return mte_base;
2058 }
2059
2060 static int pages_in_region(unsigned long base, long len)
2061 {
2062         int count = 0;
2063
2064         do {
2065                 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2066
2067                 len -= (new - base);
2068                 base = new;
2069                 count++;
2070         } while (len > 0);
2071
2072         return count;
2073 }
2074
2075 struct cookie_state {
2076         struct ldc_mtable_entry         *page_table;
2077         struct ldc_trans_cookie         *cookies;
2078         u64                             mte_base;
2079         u64                             prev_cookie;
2080         u32                             pte_idx;
2081         u32                             nc;
2082 };
2083
2084 static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2085                          unsigned long off, unsigned long len)
2086 {
2087         do {
2088                 unsigned long tlen, new = pa + PAGE_SIZE;
2089                 u64 this_cookie;
2090
2091                 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2092
2093                 tlen = PAGE_SIZE;
2094                 if (off)
2095                         tlen = PAGE_SIZE - off;
2096                 if (tlen > len)
2097                         tlen = len;
2098
2099                 this_cookie = make_cookie(sp->pte_idx,
2100                                           pagesize_code(), off);
2101
2102                 off = 0;
2103
2104                 if (this_cookie == sp->prev_cookie) {
2105                         sp->cookies[sp->nc - 1].cookie_size += tlen;
2106                 } else {
2107                         sp->cookies[sp->nc].cookie_addr = this_cookie;
2108                         sp->cookies[sp->nc].cookie_size = tlen;
2109                         sp->nc++;
2110                 }
2111                 sp->prev_cookie = this_cookie + tlen;
2112
2113                 sp->pte_idx++;
2114
2115                 len -= tlen;
2116                 pa = new;
2117         } while (len > 0);
2118 }
2119
2120 static int sg_count_one(struct scatterlist *sg)
2121 {
2122         unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2123         long len = sg->length;
2124
2125         if ((sg->offset | len) & (8UL - 1))
2126                 return -EFAULT;
2127
2128         return pages_in_region(base + sg->offset, len);
2129 }
2130
2131 static int sg_count_pages(struct scatterlist *sg, int num_sg)
2132 {
2133         int count;
2134         int i;
2135
2136         count = 0;
2137         for (i = 0; i < num_sg; i++) {
2138                 int err = sg_count_one(sg + i);
2139                 if (err < 0)
2140                         return err;
2141                 count += err;
2142         }
2143
2144         return count;
2145 }
2146
2147 int ldc_map_sg(struct ldc_channel *lp,
2148                struct scatterlist *sg, int num_sg,
2149                struct ldc_trans_cookie *cookies, int ncookies,
2150                unsigned int map_perm)
2151 {
2152         unsigned long i, npages;
2153         struct ldc_mtable_entry *base;
2154         struct cookie_state state;
2155         struct ldc_iommu *iommu;
2156         int err;
2157         struct scatterlist *s;
2158
2159         if (map_perm & ~LDC_MAP_ALL)
2160                 return -EINVAL;
2161
2162         err = sg_count_pages(sg, num_sg);
2163         if (err < 0)
2164                 return err;
2165
2166         npages = err;
2167         if (err > ncookies)
2168                 return -EMSGSIZE;
2169
2170         iommu = &lp->iommu;
2171
2172         base = alloc_npages(iommu, npages);
2173
2174         if (!base)
2175                 return -ENOMEM;
2176
2177         state.page_table = iommu->page_table;
2178         state.cookies = cookies;
2179         state.mte_base = perm_to_mte(map_perm);
2180         state.prev_cookie = ~(u64)0;
2181         state.pte_idx = (base - iommu->page_table);
2182         state.nc = 0;
2183
2184         for_each_sg(sg, s, num_sg, i) {
2185                 fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
2186                              s->offset, s->length);
2187         }
2188
2189         return state.nc;
2190 }
2191 EXPORT_SYMBOL(ldc_map_sg);
2192
2193 int ldc_map_single(struct ldc_channel *lp,
2194                    void *buf, unsigned int len,
2195                    struct ldc_trans_cookie *cookies, int ncookies,
2196                    unsigned int map_perm)
2197 {
2198         unsigned long npages, pa;
2199         struct ldc_mtable_entry *base;
2200         struct cookie_state state;
2201         struct ldc_iommu *iommu;
2202
2203         if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2204                 return -EINVAL;
2205
2206         pa = __pa(buf);
2207         if ((pa | len) & (8UL - 1))
2208                 return -EFAULT;
2209
2210         npages = pages_in_region(pa, len);
2211
2212         iommu = &lp->iommu;
2213
2214         base = alloc_npages(iommu, npages);
2215
2216         if (!base)
2217                 return -ENOMEM;
2218
2219         state.page_table = iommu->page_table;
2220         state.cookies = cookies;
2221         state.mte_base = perm_to_mte(map_perm);
2222         state.prev_cookie = ~(u64)0;
2223         state.pte_idx = (base - iommu->page_table);
2224         state.nc = 0;
2225         fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2226         BUG_ON(state.nc > ncookies);
2227
2228         return state.nc;
2229 }
2230 EXPORT_SYMBOL(ldc_map_single);
2231
2232
2233 static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2234                         u64 cookie, u64 size)
2235 {
2236         unsigned long npages, entry;
2237
2238         npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2239
2240         entry = ldc_cookie_to_index(cookie, iommu);
2241         ldc_demap(iommu, id, cookie, entry, npages);
2242         iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
2243 }
2244
2245 void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2246                int ncookies)
2247 {
2248         struct ldc_iommu *iommu = &lp->iommu;
2249         int i;
2250         unsigned long flags;
2251
2252         spin_lock_irqsave(&iommu->lock, flags);
2253         for (i = 0; i < ncookies; i++) {
2254                 u64 addr = cookies[i].cookie_addr;
2255                 u64 size = cookies[i].cookie_size;
2256
2257                 free_npages(lp->id, iommu, addr, size);
2258         }
2259         spin_unlock_irqrestore(&iommu->lock, flags);
2260 }
2261 EXPORT_SYMBOL(ldc_unmap);
2262
2263 int ldc_copy(struct ldc_channel *lp, int copy_dir,
2264              void *buf, unsigned int len, unsigned long offset,
2265              struct ldc_trans_cookie *cookies, int ncookies)
2266 {
2267         unsigned int orig_len;
2268         unsigned long ra;
2269         int i;
2270
2271         if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2272                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2273                        lp->id, copy_dir);
2274                 return -EINVAL;
2275         }
2276
2277         ra = __pa(buf);
2278         if ((ra | len | offset) & (8UL - 1)) {
2279                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2280                        "ra[%lx] len[%x] offset[%lx]\n",
2281                        lp->id, ra, len, offset);
2282                 return -EFAULT;
2283         }
2284
2285         if (lp->hs_state != LDC_HS_COMPLETE ||
2286             (lp->flags & LDC_FLAG_RESET)) {
2287                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2288                        "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2289                 return -ECONNRESET;
2290         }
2291
2292         orig_len = len;
2293         for (i = 0; i < ncookies; i++) {
2294                 unsigned long cookie_raddr = cookies[i].cookie_addr;
2295                 unsigned long this_len = cookies[i].cookie_size;
2296                 unsigned long actual_len;
2297
2298                 if (unlikely(offset)) {
2299                         unsigned long this_off = offset;
2300
2301                         if (this_off > this_len)
2302                                 this_off = this_len;
2303
2304                         offset -= this_off;
2305                         this_len -= this_off;
2306                         if (!this_len)
2307                                 continue;
2308                         cookie_raddr += this_off;
2309                 }
2310
2311                 if (this_len > len)
2312                         this_len = len;
2313
2314                 while (1) {
2315                         unsigned long hv_err;
2316
2317                         hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2318                                                 cookie_raddr, ra,
2319                                                 this_len, &actual_len);
2320                         if (unlikely(hv_err)) {
2321                                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2322                                        "HV error %lu\n",
2323                                        lp->id, hv_err);
2324                                 if (lp->hs_state != LDC_HS_COMPLETE ||
2325                                     (lp->flags & LDC_FLAG_RESET))
2326                                         return -ECONNRESET;
2327                                 else
2328                                         return -EFAULT;
2329                         }
2330
2331                         cookie_raddr += actual_len;
2332                         ra += actual_len;
2333                         len -= actual_len;
2334                         if (actual_len == this_len)
2335                                 break;
2336
2337                         this_len -= actual_len;
2338                 }
2339
2340                 if (!len)
2341                         break;
2342         }
2343
2344         /* It is caller policy what to do about short copies.
2345          * For example, a networking driver can declare the
2346          * packet a runt and drop it.
2347          */
2348
2349         return orig_len - len;
2350 }
2351 EXPORT_SYMBOL(ldc_copy);
2352
2353 void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2354                           struct ldc_trans_cookie *cookies, int *ncookies,
2355                           unsigned int map_perm)
2356 {
2357         void *buf;
2358         int err;
2359
2360         if (len & (8UL - 1))
2361                 return ERR_PTR(-EINVAL);
2362
2363         buf = kzalloc(len, GFP_ATOMIC);
2364         if (!buf)
2365                 return ERR_PTR(-ENOMEM);
2366
2367         err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2368         if (err < 0) {
2369                 kfree(buf);
2370                 return ERR_PTR(err);
2371         }
2372         *ncookies = err;
2373
2374         return buf;
2375 }
2376 EXPORT_SYMBOL(ldc_alloc_exp_dring);
2377
2378 void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2379                         struct ldc_trans_cookie *cookies, int ncookies)
2380 {
2381         ldc_unmap(lp, cookies, ncookies);
2382         kfree(buf);
2383 }
2384 EXPORT_SYMBOL(ldc_free_exp_dring);
2385
2386 static int __init ldc_init(void)
2387 {
2388         unsigned long major, minor;
2389         struct mdesc_handle *hp;
2390         const u64 *v;
2391         int err;
2392         u64 mp;
2393
2394         hp = mdesc_grab();
2395         if (!hp)
2396                 return -ENODEV;
2397
2398         mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2399         err = -ENODEV;
2400         if (mp == MDESC_NODE_NULL)
2401                 goto out;
2402
2403         v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2404         if (!v)
2405                 goto out;
2406
2407         major = 1;
2408         minor = 0;
2409         if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2410                 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2411                 goto out;
2412         }
2413
2414         printk(KERN_INFO "%s", version);
2415
2416         if (!*v) {
2417                 printk(KERN_INFO PFX "Domaining disabled.\n");
2418                 goto out;
2419         }
2420         ldom_domaining_enabled = 1;
2421         err = 0;
2422
2423 out:
2424         mdesc_release(hp);
2425         return err;
2426 }
2427
2428 core_initcall(ldc_init);