Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / drivers / crypto / nx / nx-842.c
1 /*
2  * Driver for IBM Power 842 compression accelerator
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17  *
18  * Copyright (C) IBM Corporation, 2012
19  *
20  * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
21  *          Seth Jennings <sjenning@linux.vnet.ibm.com>
22  */
23
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/nx842.h>
27 #include <linux/of.h>
28 #include <linux/slab.h>
29
30 #include <asm/page.h>
31 #include <asm/vio.h>
32
33 #include "nx_csbcpb.h" /* struct nx_csbcpb */
34
35 #define MODULE_NAME "nx-compress"
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
38 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
39
40 #define SHIFT_4K 12
41 #define SHIFT_64K 16
42 #define SIZE_4K (1UL << SHIFT_4K)
43 #define SIZE_64K (1UL << SHIFT_64K)
44
45 /* IO buffer must be 128 byte aligned */
46 #define IO_BUFFER_ALIGN 128
47
48 struct nx842_header {
49         int blocks_nr; /* number of compressed blocks */
50         int offset; /* offset of the first block (from beginning of header) */
51         int sizes[0]; /* size of compressed blocks */
52 };
53
54 static inline int nx842_header_size(const struct nx842_header *hdr)
55 {
56         return sizeof(struct nx842_header) +
57                         hdr->blocks_nr * sizeof(hdr->sizes[0]);
58 }
59
60 /* Macros for fields within nx_csbcpb */
61 /* Check the valid bit within the csbcpb valid field */
62 #define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
63
64 /* CE macros operate on the completion_extension field bits in the csbcpb.
65  * CE0 0=full completion, 1=partial completion
66  * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
67  * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
68 #define NX842_CSBCPB_CE0(x)     (x & BIT_MASK(7))
69 #define NX842_CSBCPB_CE1(x)     (x & BIT_MASK(6))
70 #define NX842_CSBCPB_CE2(x)     (x & BIT_MASK(5))
71
72 /* The NX unit accepts data only on 4K page boundaries */
73 #define NX842_HW_PAGE_SHIFT     SHIFT_4K
74 #define NX842_HW_PAGE_SIZE      (ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
75 #define NX842_HW_PAGE_MASK      (~(NX842_HW_PAGE_SIZE-1))
76
77 enum nx842_status {
78         UNAVAILABLE,
79         AVAILABLE
80 };
81
82 struct ibm_nx842_counters {
83         atomic64_t comp_complete;
84         atomic64_t comp_failed;
85         atomic64_t decomp_complete;
86         atomic64_t decomp_failed;
87         atomic64_t swdecomp;
88         atomic64_t comp_times[32];
89         atomic64_t decomp_times[32];
90 };
91
92 static struct nx842_devdata {
93         struct vio_dev *vdev;
94         struct device *dev;
95         struct ibm_nx842_counters *counters;
96         unsigned int max_sg_len;
97         unsigned int max_sync_size;
98         unsigned int max_sync_sg;
99         enum nx842_status status;
100 } __rcu *devdata;
101 static DEFINE_SPINLOCK(devdata_mutex);
102
103 #define NX842_COUNTER_INC(_x) \
104 static inline void nx842_inc_##_x( \
105         const struct nx842_devdata *dev) { \
106         if (dev) \
107                 atomic64_inc(&dev->counters->_x); \
108 }
109 NX842_COUNTER_INC(comp_complete);
110 NX842_COUNTER_INC(comp_failed);
111 NX842_COUNTER_INC(decomp_complete);
112 NX842_COUNTER_INC(decomp_failed);
113 NX842_COUNTER_INC(swdecomp);
114
115 #define NX842_HIST_SLOTS 16
116
117 static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time)
118 {
119         int bucket = fls(time);
120
121         if (bucket)
122                 bucket = min((NX842_HIST_SLOTS - 1), bucket - 1);
123
124         atomic64_inc(&times[bucket]);
125 }
126
127 /* NX unit operation flags */
128 #define NX842_OP_COMPRESS       0x0
129 #define NX842_OP_CRC            0x1
130 #define NX842_OP_DECOMPRESS     0x2
131 #define NX842_OP_COMPRESS_CRC   (NX842_OP_COMPRESS | NX842_OP_CRC)
132 #define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
133 #define NX842_OP_ASYNC          (1<<23)
134 #define NX842_OP_NOTIFY         (1<<22)
135 #define NX842_OP_NOTIFY_INT(x)  ((x & 0xff)<<8)
136
137 static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
138 {
139         /* No use of DMA mappings within the driver. */
140         return 0;
141 }
142
143 struct nx842_slentry {
144         unsigned long ptr; /* Real address (use __pa()) */
145         unsigned long len;
146 };
147
148 /* pHyp scatterlist entry */
149 struct nx842_scatterlist {
150         int entry_nr; /* number of slentries */
151         struct nx842_slentry *entries; /* ptr to array of slentries */
152 };
153
154 /* Does not include sizeof(entry_nr) in the size */
155 static inline unsigned long nx842_get_scatterlist_size(
156                                 struct nx842_scatterlist *sl)
157 {
158         return sl->entry_nr * sizeof(struct nx842_slentry);
159 }
160
161 static inline unsigned long nx842_get_pa(void *addr)
162 {
163         if (is_vmalloc_addr(addr))
164                 return page_to_phys(vmalloc_to_page(addr))
165                        + offset_in_page(addr);
166         else
167                 return __pa(addr);
168 }
169
170 static int nx842_build_scatterlist(unsigned long buf, int len,
171                         struct nx842_scatterlist *sl)
172 {
173         unsigned long nextpage;
174         struct nx842_slentry *entry;
175
176         sl->entry_nr = 0;
177
178         entry = sl->entries;
179         while (len) {
180                 entry->ptr = nx842_get_pa((void *)buf);
181                 nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE);
182                 if (nextpage < buf + len) {
183                         /* we aren't at the end yet */
184                         if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE))
185                                 /* we are in the middle (or beginning) */
186                                 entry->len = NX842_HW_PAGE_SIZE;
187                         else
188                                 /* we are at the beginning */
189                                 entry->len = nextpage - buf;
190                 } else {
191                         /* at the end */
192                         entry->len = len;
193                 }
194
195                 len -= entry->len;
196                 buf += entry->len;
197                 sl->entry_nr++;
198                 entry++;
199         }
200
201         return 0;
202 }
203
204 /*
205  * Working memory for software decompression
206  */
207 struct sw842_fifo {
208         union {
209                 char f8[256][8];
210                 char f4[512][4];
211         };
212         char f2[256][2];
213         unsigned char f84_full;
214         unsigned char f2_full;
215         unsigned char f8_count;
216         unsigned char f2_count;
217         unsigned int f4_count;
218 };
219
220 /*
221  * Working memory for crypto API
222  */
223 struct nx842_workmem {
224         char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */
225         union {
226                 /* hardware working memory */
227                 struct {
228                         /* scatterlist */
229                         char slin[SIZE_4K];
230                         char slout[SIZE_4K];
231                         /* coprocessor status/parameter block */
232                         struct nx_csbcpb csbcpb;
233                 };
234                 /* software working memory */
235                 struct sw842_fifo swfifo; /* software decompression fifo */
236         };
237 };
238
239 int nx842_get_workmem_size(void)
240 {
241         return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE;
242 }
243 EXPORT_SYMBOL_GPL(nx842_get_workmem_size);
244
245 int nx842_get_workmem_size_aligned(void)
246 {
247         return sizeof(struct nx842_workmem);
248 }
249 EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned);
250
251 static int nx842_validate_result(struct device *dev,
252         struct cop_status_block *csb)
253 {
254         /* The csb must be valid after returning from vio_h_cop_sync */
255         if (!NX842_CSBCBP_VALID_CHK(csb->valid)) {
256                 dev_err(dev, "%s: cspcbp not valid upon completion.\n",
257                                 __func__);
258                 dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
259                                 csb->valid,
260                                 csb->crb_seq_number,
261                                 csb->completion_code,
262                                 csb->completion_extension);
263                 dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n",
264                                 csb->processed_byte_count,
265                                 (unsigned long)csb->address);
266                 return -EIO;
267         }
268
269         /* Check return values from the hardware in the CSB */
270         switch (csb->completion_code) {
271         case 0: /* Completed without error */
272                 break;
273         case 64: /* Target bytes > Source bytes during compression */
274         case 13: /* Output buffer too small */
275                 dev_dbg(dev, "%s: Compression output larger than input\n",
276                                         __func__);
277                 return -ENOSPC;
278         case 66: /* Input data contains an illegal template field */
279         case 67: /* Template indicates data past the end of the input stream */
280                 dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
281                                         __func__, csb->completion_code);
282                 return -EINVAL;
283         default:
284                 dev_dbg(dev, "%s: Unspecified error (code:%d)\n",
285                                         __func__, csb->completion_code);
286                 return -EIO;
287         }
288
289         /* Hardware sanity check */
290         if (!NX842_CSBCPB_CE2(csb->completion_extension)) {
291                 dev_err(dev, "%s: No error returned by hardware, but "
292                                 "data returned is unusable, contact support.\n"
293                                 "(Additional info: csbcbp->processed bytes "
294                                 "does not specify processed bytes for the "
295                                 "target buffer.)\n", __func__);
296                 return -EIO;
297         }
298
299         return 0;
300 }
301
302 /**
303  * nx842_compress - Compress data using the 842 algorithm
304  *
305  * Compression provide by the NX842 coprocessor on IBM Power systems.
306  * The input buffer is compressed and the result is stored in the
307  * provided output buffer.
308  *
309  * Upon return from this function @outlen contains the length of the
310  * compressed data.  If there is an error then @outlen will be 0 and an
311  * error will be specified by the return code from this function.
312  *
313  * @in: Pointer to input buffer, must be page aligned
314  * @inlen: Length of input buffer, must be PAGE_SIZE
315  * @out: Pointer to output buffer
316  * @outlen: Length of output buffer
317  * @wrkmem: ptr to buffer for working memory, size determined by
318  *          nx842_get_workmem_size()
319  *
320  * Returns:
321  *   0          Success, output of length @outlen stored in the buffer at @out
322  *   -ENOMEM    Unable to allocate internal buffers
323  *   -ENOSPC    Output buffer is to small
324  *   -EMSGSIZE  XXX Difficult to describe this limitation
325  *   -EIO       Internal error
326  *   -ENODEV    Hardware unavailable
327  */
328 int nx842_compress(const unsigned char *in, unsigned int inlen,
329                        unsigned char *out, unsigned int *outlen, void *wmem)
330 {
331         struct nx842_header *hdr;
332         struct nx842_devdata *local_devdata;
333         struct device *dev = NULL;
334         struct nx842_workmem *workmem;
335         struct nx842_scatterlist slin, slout;
336         struct nx_csbcpb *csbcpb;
337         int ret = 0, max_sync_size, i, bytesleft, size, hdrsize;
338         unsigned long inbuf, outbuf, padding;
339         struct vio_pfo_op op = {
340                 .done = NULL,
341                 .handle = 0,
342                 .timeout = 0,
343         };
344         unsigned long start_time = get_tb();
345
346         /*
347          * Make sure input buffer is 64k page aligned.  This is assumed since
348          * this driver is designed for page compression only (for now).  This
349          * is very nice since we can now use direct DDE(s) for the input and
350          * the alignment is guaranteed.
351         */
352         inbuf = (unsigned long)in;
353         if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE)
354                 return -EINVAL;
355
356         rcu_read_lock();
357         local_devdata = rcu_dereference(devdata);
358         if (!local_devdata || !local_devdata->dev) {
359                 rcu_read_unlock();
360                 return -ENODEV;
361         }
362         max_sync_size = local_devdata->max_sync_size;
363         dev = local_devdata->dev;
364
365         /* Create the header */
366         hdr = (struct nx842_header *)out;
367         hdr->blocks_nr = PAGE_SIZE / max_sync_size;
368         hdrsize = nx842_header_size(hdr);
369         outbuf = (unsigned long)out + hdrsize;
370         bytesleft = *outlen - hdrsize;
371
372         /* Init scatterlist */
373         workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
374                 NX842_HW_PAGE_SIZE);
375         slin.entries = (struct nx842_slentry *)workmem->slin;
376         slout.entries = (struct nx842_slentry *)workmem->slout;
377
378         /* Init operation */
379         op.flags = NX842_OP_COMPRESS;
380         csbcpb = &workmem->csbcpb;
381         memset(csbcpb, 0, sizeof(*csbcpb));
382         op.csbcpb = nx842_get_pa(csbcpb);
383         op.out = nx842_get_pa(slout.entries);
384
385         for (i = 0; i < hdr->blocks_nr; i++) {
386                 /*
387                  * Aligning the output blocks to 128 bytes does waste space,
388                  * but it prevents the need for bounce buffers and memory
389                  * copies.  It also simplifies the code a lot.  In the worst
390                  * case (64k page, 4k max_sync_size), you lose up to
391                  * (128*16)/64k = ~3% the compression factor. For 64k
392                  * max_sync_size, the loss would be at most 128/64k = ~0.2%.
393                  */
394                 padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf;
395                 outbuf += padding;
396                 bytesleft -= padding;
397                 if (i == 0)
398                         /* save offset into first block in header */
399                         hdr->offset = padding + hdrsize;
400
401                 if (bytesleft <= 0) {
402                         ret = -ENOSPC;
403                         goto unlock;
404                 }
405
406                 /*
407                  * NOTE: If the default max_sync_size is changed from 4k
408                  * to 64k, remove the "likely" case below, since a
409                  * scatterlist will always be needed.
410                  */
411                 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
412                         /* Create direct DDE */
413                         op.in = nx842_get_pa((void *)inbuf);
414                         op.inlen = max_sync_size;
415
416                 } else {
417                         /* Create indirect DDE (scatterlist) */
418                         nx842_build_scatterlist(inbuf, max_sync_size, &slin);
419                         op.in = nx842_get_pa(slin.entries);
420                         op.inlen = -nx842_get_scatterlist_size(&slin);
421                 }
422
423                 /*
424                  * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect
425                  * DDE is required for the outbuf.
426                  * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must
427                  * also be page aligned (1 in 128/4k=32 chance) in order
428                  * to use a direct DDE.
429                  * This is unlikely, just use an indirect DDE always.
430                  */
431                 nx842_build_scatterlist(outbuf,
432                         min(bytesleft, max_sync_size), &slout);
433                 /* op.out set before loop */
434                 op.outlen = -nx842_get_scatterlist_size(&slout);
435
436                 /* Send request to pHyp */
437                 ret = vio_h_cop_sync(local_devdata->vdev, &op);
438
439                 /* Check for pHyp error */
440                 if (ret) {
441                         dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
442                                 __func__, ret, op.hcall_err);
443                         ret = -EIO;
444                         goto unlock;
445                 }
446
447                 /* Check for hardware error */
448                 ret = nx842_validate_result(dev, &csbcpb->csb);
449                 if (ret && ret != -ENOSPC)
450                         goto unlock;
451
452                 /* Handle incompressible data */
453                 if (unlikely(ret == -ENOSPC)) {
454                         if (bytesleft < max_sync_size) {
455                                 /*
456                                  * Not enough space left in the output buffer
457                                  * to store uncompressed block
458                                  */
459                                 goto unlock;
460                         } else {
461                                 /* Store incompressible block */
462                                 memcpy((void *)outbuf, (void *)inbuf,
463                                         max_sync_size);
464                                 hdr->sizes[i] = -max_sync_size;
465                                 outbuf += max_sync_size;
466                                 bytesleft -= max_sync_size;
467                                 /* Reset ret, incompressible data handled */
468                                 ret = 0;
469                         }
470                 } else {
471                         /* Normal case, compression was successful */
472                         size = csbcpb->csb.processed_byte_count;
473                         dev_dbg(dev, "%s: processed_bytes=%d\n",
474                                 __func__, size);
475                         hdr->sizes[i] = size;
476                         outbuf += size;
477                         bytesleft -= size;
478                 }
479
480                 inbuf += max_sync_size;
481         }
482
483         *outlen = (unsigned int)(outbuf - (unsigned long)out);
484
485 unlock:
486         if (ret)
487                 nx842_inc_comp_failed(local_devdata);
488         else {
489                 nx842_inc_comp_complete(local_devdata);
490                 ibm_nx842_incr_hist(local_devdata->counters->comp_times,
491                         (get_tb() - start_time) / tb_ticks_per_usec);
492         }
493         rcu_read_unlock();
494         return ret;
495 }
496 EXPORT_SYMBOL_GPL(nx842_compress);
497
498 static int sw842_decompress(const unsigned char *, int, unsigned char *, int *,
499                         const void *);
500
501 /**
502  * nx842_decompress - Decompress data using the 842 algorithm
503  *
504  * Decompression provide by the NX842 coprocessor on IBM Power systems.
505  * The input buffer is decompressed and the result is stored in the
506  * provided output buffer.  The size allocated to the output buffer is
507  * provided by the caller of this function in @outlen.  Upon return from
508  * this function @outlen contains the length of the decompressed data.
509  * If there is an error then @outlen will be 0 and an error will be
510  * specified by the return code from this function.
511  *
512  * @in: Pointer to input buffer, will use bounce buffer if not 128 byte
513  *      aligned
514  * @inlen: Length of input buffer
515  * @out: Pointer to output buffer, must be page aligned
516  * @outlen: Length of output buffer, must be PAGE_SIZE
517  * @wrkmem: ptr to buffer for working memory, size determined by
518  *          nx842_get_workmem_size()
519  *
520  * Returns:
521  *   0          Success, output of length @outlen stored in the buffer at @out
522  *   -ENODEV    Hardware decompression device is unavailable
523  *   -ENOMEM    Unable to allocate internal buffers
524  *   -ENOSPC    Output buffer is to small
525  *   -EINVAL    Bad input data encountered when attempting decompress
526  *   -EIO       Internal error
527  */
528 int nx842_decompress(const unsigned char *in, unsigned int inlen,
529                          unsigned char *out, unsigned int *outlen, void *wmem)
530 {
531         struct nx842_header *hdr;
532         struct nx842_devdata *local_devdata;
533         struct device *dev = NULL;
534         struct nx842_workmem *workmem;
535         struct nx842_scatterlist slin, slout;
536         struct nx_csbcpb *csbcpb;
537         int ret = 0, i, size, max_sync_size;
538         unsigned long inbuf, outbuf;
539         struct vio_pfo_op op = {
540                 .done = NULL,
541                 .handle = 0,
542                 .timeout = 0,
543         };
544         unsigned long start_time = get_tb();
545
546         /* Ensure page alignment and size */
547         outbuf = (unsigned long)out;
548         if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE)
549                 return -EINVAL;
550
551         rcu_read_lock();
552         local_devdata = rcu_dereference(devdata);
553         if (local_devdata)
554                 dev = local_devdata->dev;
555
556         /* Get header */
557         hdr = (struct nx842_header *)in;
558
559         workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
560                 NX842_HW_PAGE_SIZE);
561
562         inbuf = (unsigned long)in + hdr->offset;
563         if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) {
564                 /* Copy block(s) into bounce buffer for alignment */
565                 memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset);
566                 inbuf = (unsigned long)workmem->bounce;
567         }
568
569         /* Init scatterlist */
570         slin.entries = (struct nx842_slentry *)workmem->slin;
571         slout.entries = (struct nx842_slentry *)workmem->slout;
572
573         /* Init operation */
574         op.flags = NX842_OP_DECOMPRESS;
575         csbcpb = &workmem->csbcpb;
576         memset(csbcpb, 0, sizeof(*csbcpb));
577         op.csbcpb = nx842_get_pa(csbcpb);
578
579         /*
580          * max_sync_size may have changed since compression,
581          * so we can't read it from the device info. We need
582          * to derive it from hdr->blocks_nr.
583          */
584         max_sync_size = PAGE_SIZE / hdr->blocks_nr;
585
586         for (i = 0; i < hdr->blocks_nr; i++) {
587                 /* Skip padding */
588                 inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN);
589
590                 if (hdr->sizes[i] < 0) {
591                         /* Negative sizes indicate uncompressed data blocks */
592                         size = abs(hdr->sizes[i]);
593                         memcpy((void *)outbuf, (void *)inbuf, size);
594                         outbuf += size;
595                         inbuf += size;
596                         continue;
597                 }
598
599                 if (!dev)
600                         goto sw;
601
602                 /*
603                  * The better the compression, the more likely the "likely"
604                  * case becomes.
605                  */
606                 if (likely((inbuf & NX842_HW_PAGE_MASK) ==
607                         ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) {
608                         /* Create direct DDE */
609                         op.in = nx842_get_pa((void *)inbuf);
610                         op.inlen = hdr->sizes[i];
611                 } else {
612                         /* Create indirect DDE (scatterlist) */
613                         nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin);
614                         op.in = nx842_get_pa(slin.entries);
615                         op.inlen = -nx842_get_scatterlist_size(&slin);
616                 }
617
618                 /*
619                  * NOTE: If the default max_sync_size is changed from 4k
620                  * to 64k, remove the "likely" case below, since a
621                  * scatterlist will always be needed.
622                  */
623                 if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
624                         /* Create direct DDE */
625                         op.out = nx842_get_pa((void *)outbuf);
626                         op.outlen = max_sync_size;
627                 } else {
628                         /* Create indirect DDE (scatterlist) */
629                         nx842_build_scatterlist(outbuf, max_sync_size, &slout);
630                         op.out = nx842_get_pa(slout.entries);
631                         op.outlen = -nx842_get_scatterlist_size(&slout);
632                 }
633
634                 /* Send request to pHyp */
635                 ret = vio_h_cop_sync(local_devdata->vdev, &op);
636
637                 /* Check for pHyp error */
638                 if (ret) {
639                         dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
640                                 __func__, ret, op.hcall_err);
641                         dev = NULL;
642                         goto sw;
643                 }
644
645                 /* Check for hardware error */
646                 ret = nx842_validate_result(dev, &csbcpb->csb);
647                 if (ret) {
648                         dev = NULL;
649                         goto sw;
650                 }
651
652                 /* HW decompression success */
653                 inbuf += hdr->sizes[i];
654                 outbuf += csbcpb->csb.processed_byte_count;
655                 continue;
656
657 sw:
658                 /* software decompression */
659                 size = max_sync_size;
660                 ret = sw842_decompress(
661                         (unsigned char *)inbuf, hdr->sizes[i],
662                         (unsigned char *)outbuf, &size, wmem);
663                 if (ret)
664                         pr_debug("%s: sw842_decompress failed with %d\n",
665                                 __func__, ret);
666
667                 if (ret) {
668                         if (ret != -ENOSPC && ret != -EINVAL &&
669                                         ret != -EMSGSIZE)
670                                 ret = -EIO;
671                         goto unlock;
672                 }
673
674                 /* SW decompression success */
675                 inbuf += hdr->sizes[i];
676                 outbuf += size;
677         }
678
679         *outlen = (unsigned int)(outbuf - (unsigned long)out);
680
681 unlock:
682         if (ret)
683                 /* decompress fail */
684                 nx842_inc_decomp_failed(local_devdata);
685         else {
686                 if (!dev)
687                         /* software decompress */
688                         nx842_inc_swdecomp(local_devdata);
689                 nx842_inc_decomp_complete(local_devdata);
690                 ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
691                         (get_tb() - start_time) / tb_ticks_per_usec);
692         }
693
694         rcu_read_unlock();
695         return ret;
696 }
697 EXPORT_SYMBOL_GPL(nx842_decompress);
698
699 /**
700  * nx842_OF_set_defaults -- Set default (disabled) values for devdata
701  *
702  * @devdata - struct nx842_devdata to update
703  *
704  * Returns:
705  *  0 on success
706  *  -ENOENT if @devdata ptr is NULL
707  */
708 static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
709 {
710         if (devdata) {
711                 devdata->max_sync_size = 0;
712                 devdata->max_sync_sg = 0;
713                 devdata->max_sg_len = 0;
714                 devdata->status = UNAVAILABLE;
715                 return 0;
716         } else
717                 return -ENOENT;
718 }
719
720 /**
721  * nx842_OF_upd_status -- Update the device info from OF status prop
722  *
723  * The status property indicates if the accelerator is enabled.  If the
724  * device is in the OF tree it indicates that the hardware is present.
725  * The status field indicates if the device is enabled when the status
726  * is 'okay'.  Otherwise the device driver will be disabled.
727  *
728  * @devdata - struct nx842_devdata to update
729  * @prop - struct property point containing the maxsyncop for the update
730  *
731  * Returns:
732  *  0 - Device is available
733  *  -EINVAL - Device is not available
734  */
735 static int nx842_OF_upd_status(struct nx842_devdata *devdata,
736                                         struct property *prop) {
737         int ret = 0;
738         const char *status = (const char *)prop->value;
739
740         if (!strncmp(status, "okay", (size_t)prop->length)) {
741                 devdata->status = AVAILABLE;
742         } else {
743                 dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n",
744                                 __func__, status);
745                 devdata->status = UNAVAILABLE;
746         }
747
748         return ret;
749 }
750
751 /**
752  * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop
753  *
754  * Definition of the 'ibm,max-sg-len' OF property:
755  *  This field indicates the maximum byte length of a scatter list
756  *  for the platform facility. It is a single cell encoded as with encode-int.
757  *
758  * Example:
759  *  # od -x ibm,max-sg-len
760  *  0000000 0000 0ff0
761  *
762  *  In this example, the maximum byte length of a scatter list is
763  *  0x0ff0 (4,080).
764  *
765  * @devdata - struct nx842_devdata to update
766  * @prop - struct property point containing the maxsyncop for the update
767  *
768  * Returns:
769  *  0 on success
770  *  -EINVAL on failure
771  */
772 static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
773                                         struct property *prop) {
774         int ret = 0;
775         const int *maxsglen = prop->value;
776
777         if (prop->length != sizeof(*maxsglen)) {
778                 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__);
779                 dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
780                                 prop->length, sizeof(*maxsglen));
781                 ret = -EINVAL;
782         } else {
783                 devdata->max_sg_len = (unsigned int)min(*maxsglen,
784                                 (int)NX842_HW_PAGE_SIZE);
785         }
786
787         return ret;
788 }
789
790 /**
791  * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop
792  *
793  * Definition of the 'ibm,max-sync-cop' OF property:
794  *  Two series of cells.  The first series of cells represents the maximums
795  *  that can be synchronously compressed. The second series of cells
796  *  represents the maximums that can be synchronously decompressed.
797  *  1. The first cell in each series contains the count of the number of
798  *     data length, scatter list elements pairs that follow â€“ each being
799  *     of the form
800  *    a. One cell data byte length
801  *    b. One cell total number of scatter list elements
802  *
803  * Example:
804  *  # od -x ibm,max-sync-cop
805  *  0000000 0000 0001 0000 1000 0000 01fe 0000 0001
806  *  0000020 0000 1000 0000 01fe
807  *
808  *  In this example, compression supports 0x1000 (4,096) data byte length
809  *  and 0x1fe (510) total scatter list elements.  Decompression supports
810  *  0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
811  *  elements.
812  *
813  * @devdata - struct nx842_devdata to update
814  * @prop - struct property point containing the maxsyncop for the update
815  *
816  * Returns:
817  *  0 on success
818  *  -EINVAL on failure
819  */
820 static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
821                                         struct property *prop) {
822         int ret = 0;
823         const struct maxsynccop_t {
824                 int comp_elements;
825                 int comp_data_limit;
826                 int comp_sg_limit;
827                 int decomp_elements;
828                 int decomp_data_limit;
829                 int decomp_sg_limit;
830         } *maxsynccop;
831
832         if (prop->length != sizeof(*maxsynccop)) {
833                 dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__);
834                 dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length,
835                                 sizeof(*maxsynccop));
836                 ret = -EINVAL;
837                 goto out;
838         }
839
840         maxsynccop = (const struct maxsynccop_t *)prop->value;
841
842         /* Use one limit rather than separate limits for compression and
843          * decompression. Set a maximum for this so as not to exceed the
844          * size that the header can support and round the value down to
845          * the hardware page size (4K) */
846         devdata->max_sync_size =
847                         (unsigned int)min(maxsynccop->comp_data_limit,
848                                         maxsynccop->decomp_data_limit);
849
850         devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
851                                         SIZE_64K);
852
853         if (devdata->max_sync_size < SIZE_4K) {
854                 dev_err(devdata->dev, "%s: hardware max data size (%u) is "
855                                 "less than the driver minimum, unable to use "
856                                 "the hardware device\n",
857                                 __func__, devdata->max_sync_size);
858                 ret = -EINVAL;
859                 goto out;
860         }
861
862         devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit,
863                                                 maxsynccop->decomp_sg_limit);
864         if (devdata->max_sync_sg < 1) {
865                 dev_err(devdata->dev, "%s: hardware max sg size (%u) is "
866                                 "less than the driver minimum, unable to use "
867                                 "the hardware device\n",
868                                 __func__, devdata->max_sync_sg);
869                 ret = -EINVAL;
870                 goto out;
871         }
872
873 out:
874         return ret;
875 }
876
877 /**
878  *
879  * nx842_OF_upd -- Handle OF properties updates for the device.
880  *
881  * Set all properties from the OF tree.  Optionally, a new property
882  * can be provided by the @new_prop pointer to overwrite an existing value.
883  * The device will remain disabled until all values are valid, this function
884  * will return an error for updates unless all values are valid.
885  *
886  * @new_prop: If not NULL, this property is being updated.  If NULL, update
887  *  all properties from the current values in the OF tree.
888  *
889  * Returns:
890  *  0 - Success
891  *  -ENOMEM - Could not allocate memory for new devdata structure
892  *  -EINVAL - property value not found, new_prop is not a recognized
893  *      property for the device or property value is not valid.
894  *  -ENODEV - Device is not available
895  */
896 static int nx842_OF_upd(struct property *new_prop)
897 {
898         struct nx842_devdata *old_devdata = NULL;
899         struct nx842_devdata *new_devdata = NULL;
900         struct device_node *of_node = NULL;
901         struct property *status = NULL;
902         struct property *maxsglen = NULL;
903         struct property *maxsyncop = NULL;
904         int ret = 0;
905         unsigned long flags;
906
907         spin_lock_irqsave(&devdata_mutex, flags);
908         old_devdata = rcu_dereference_check(devdata,
909                         lockdep_is_held(&devdata_mutex));
910         if (old_devdata)
911                 of_node = old_devdata->dev->of_node;
912
913         if (!old_devdata || !of_node) {
914                 pr_err("%s: device is not available\n", __func__);
915                 spin_unlock_irqrestore(&devdata_mutex, flags);
916                 return -ENODEV;
917         }
918
919         new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
920         if (!new_devdata) {
921                 dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__);
922                 ret = -ENOMEM;
923                 goto error_out;
924         }
925
926         memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
927         new_devdata->counters = old_devdata->counters;
928
929         /* Set ptrs for existing properties */
930         status = of_find_property(of_node, "status", NULL);
931         maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
932         maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
933         if (!status || !maxsglen || !maxsyncop) {
934                 dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
935                 ret = -EINVAL;
936                 goto error_out;
937         }
938
939         /* Set ptr to new property if provided */
940         if (new_prop) {
941                 /* Single property */
942                 if (!strncmp(new_prop->name, "status", new_prop->length)) {
943                         status = new_prop;
944
945                 } else if (!strncmp(new_prop->name, "ibm,max-sg-len",
946                                         new_prop->length)) {
947                         maxsglen = new_prop;
948
949                 } else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
950                                         new_prop->length)) {
951                         maxsyncop = new_prop;
952
953                 } else {
954                         /*
955                          * Skip the update, the property being updated
956                          * has no impact.
957                          */
958                         goto out;
959                 }
960         }
961
962         /* Perform property updates */
963         ret = nx842_OF_upd_status(new_devdata, status);
964         if (ret)
965                 goto error_out;
966
967         ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
968         if (ret)
969                 goto error_out;
970
971         ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
972         if (ret)
973                 goto error_out;
974
975 out:
976         dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
977                         __func__, new_devdata->max_sync_size,
978                         old_devdata->max_sync_size);
979         dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
980                         __func__, new_devdata->max_sync_sg,
981                         old_devdata->max_sync_sg);
982         dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
983                         __func__, new_devdata->max_sg_len,
984                         old_devdata->max_sg_len);
985
986         rcu_assign_pointer(devdata, new_devdata);
987         spin_unlock_irqrestore(&devdata_mutex, flags);
988         synchronize_rcu();
989         dev_set_drvdata(new_devdata->dev, new_devdata);
990         kfree(old_devdata);
991         return 0;
992
993 error_out:
994         if (new_devdata) {
995                 dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
996                 nx842_OF_set_defaults(new_devdata);
997                 rcu_assign_pointer(devdata, new_devdata);
998                 spin_unlock_irqrestore(&devdata_mutex, flags);
999                 synchronize_rcu();
1000                 dev_set_drvdata(new_devdata->dev, new_devdata);
1001                 kfree(old_devdata);
1002         } else {
1003                 dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
1004                 spin_unlock_irqrestore(&devdata_mutex, flags);
1005         }
1006
1007         if (!ret)
1008                 ret = -EINVAL;
1009         return ret;
1010 }
1011
1012 /**
1013  * nx842_OF_notifier - Process updates to OF properties for the device
1014  *
1015  * @np: notifier block
1016  * @action: notifier action
1017  * @update: struct pSeries_reconfig_prop_update pointer if action is
1018  *      PSERIES_UPDATE_PROPERTY
1019  *
1020  * Returns:
1021  *      NOTIFY_OK on success
1022  *      NOTIFY_BAD encoded with error number on failure, use
1023  *              notifier_to_errno() to decode this value
1024  */
1025 static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
1026                              void *update)
1027 {
1028         struct of_prop_reconfig *upd = update;
1029         struct nx842_devdata *local_devdata;
1030         struct device_node *node = NULL;
1031
1032         rcu_read_lock();
1033         local_devdata = rcu_dereference(devdata);
1034         if (local_devdata)
1035                 node = local_devdata->dev->of_node;
1036
1037         if (local_devdata &&
1038                         action == OF_RECONFIG_UPDATE_PROPERTY &&
1039                         !strcmp(upd->dn->name, node->name)) {
1040                 rcu_read_unlock();
1041                 nx842_OF_upd(upd->prop);
1042         } else
1043                 rcu_read_unlock();
1044
1045         return NOTIFY_OK;
1046 }
1047
1048 static struct notifier_block nx842_of_nb = {
1049         .notifier_call = nx842_OF_notifier,
1050 };
1051
1052 #define nx842_counter_read(_name)                                       \
1053 static ssize_t nx842_##_name##_show(struct device *dev,         \
1054                 struct device_attribute *attr,                          \
1055                 char *buf) {                                            \
1056         struct nx842_devdata *local_devdata;                    \
1057         int p = 0;                                                      \
1058         rcu_read_lock();                                                \
1059         local_devdata = rcu_dereference(devdata);                       \
1060         if (local_devdata)                                              \
1061                 p = snprintf(buf, PAGE_SIZE, "%ld\n",                   \
1062                        atomic64_read(&local_devdata->counters->_name)); \
1063         rcu_read_unlock();                                              \
1064         return p;                                                       \
1065 }
1066
1067 #define NX842DEV_COUNTER_ATTR_RO(_name)                                 \
1068         nx842_counter_read(_name);                                      \
1069         static struct device_attribute dev_attr_##_name = __ATTR(_name, \
1070                                                 0444,                   \
1071                                                 nx842_##_name##_show,\
1072                                                 NULL);
1073
1074 NX842DEV_COUNTER_ATTR_RO(comp_complete);
1075 NX842DEV_COUNTER_ATTR_RO(comp_failed);
1076 NX842DEV_COUNTER_ATTR_RO(decomp_complete);
1077 NX842DEV_COUNTER_ATTR_RO(decomp_failed);
1078 NX842DEV_COUNTER_ATTR_RO(swdecomp);
1079
1080 static ssize_t nx842_timehist_show(struct device *,
1081                 struct device_attribute *, char *);
1082
1083 static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444,
1084                 nx842_timehist_show, NULL);
1085 static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times,
1086                 0444, nx842_timehist_show, NULL);
1087
1088 static ssize_t nx842_timehist_show(struct device *dev,
1089                 struct device_attribute *attr, char *buf) {
1090         char *p = buf;
1091         struct nx842_devdata *local_devdata;
1092         atomic64_t *times;
1093         int bytes_remain = PAGE_SIZE;
1094         int bytes;
1095         int i;
1096
1097         rcu_read_lock();
1098         local_devdata = rcu_dereference(devdata);
1099         if (!local_devdata) {
1100                 rcu_read_unlock();
1101                 return 0;
1102         }
1103
1104         if (attr == &dev_attr_comp_times)
1105                 times = local_devdata->counters->comp_times;
1106         else if (attr == &dev_attr_decomp_times)
1107                 times = local_devdata->counters->decomp_times;
1108         else {
1109                 rcu_read_unlock();
1110                 return 0;
1111         }
1112
1113         for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
1114                 bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n",
1115                                i ? (2<<(i-1)) : 0, (2<<i)-1,
1116                                atomic64_read(&times[i]));
1117                 bytes_remain -= bytes;
1118                 p += bytes;
1119         }
1120         /* The last bucket holds everything over
1121          * 2<<(NX842_HIST_SLOTS - 2) us */
1122         bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n",
1123                         2<<(NX842_HIST_SLOTS - 2),
1124                         atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
1125         p += bytes;
1126
1127         rcu_read_unlock();
1128         return p - buf;
1129 }
1130
1131 static struct attribute *nx842_sysfs_entries[] = {
1132         &dev_attr_comp_complete.attr,
1133         &dev_attr_comp_failed.attr,
1134         &dev_attr_decomp_complete.attr,
1135         &dev_attr_decomp_failed.attr,
1136         &dev_attr_swdecomp.attr,
1137         &dev_attr_comp_times.attr,
1138         &dev_attr_decomp_times.attr,
1139         NULL,
1140 };
1141
1142 static struct attribute_group nx842_attribute_group = {
1143         .name = NULL,           /* put in device directory */
1144         .attrs = nx842_sysfs_entries,
1145 };
1146
1147 static int __init nx842_probe(struct vio_dev *viodev,
1148                                   const struct vio_device_id *id)
1149 {
1150         struct nx842_devdata *old_devdata, *new_devdata = NULL;
1151         unsigned long flags;
1152         int ret = 0;
1153
1154         spin_lock_irqsave(&devdata_mutex, flags);
1155         old_devdata = rcu_dereference_check(devdata,
1156                         lockdep_is_held(&devdata_mutex));
1157
1158         if (old_devdata && old_devdata->vdev != NULL) {
1159                 dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
1160                 ret = -1;
1161                 goto error_unlock;
1162         }
1163
1164         dev_set_drvdata(&viodev->dev, NULL);
1165
1166         new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
1167         if (!new_devdata) {
1168                 dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__);
1169                 ret = -ENOMEM;
1170                 goto error_unlock;
1171         }
1172
1173         new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
1174                         GFP_NOFS);
1175         if (!new_devdata->counters) {
1176                 dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__);
1177                 ret = -ENOMEM;
1178                 goto error_unlock;
1179         }
1180
1181         new_devdata->vdev = viodev;
1182         new_devdata->dev = &viodev->dev;
1183         nx842_OF_set_defaults(new_devdata);
1184
1185         rcu_assign_pointer(devdata, new_devdata);
1186         spin_unlock_irqrestore(&devdata_mutex, flags);
1187         synchronize_rcu();
1188         kfree(old_devdata);
1189
1190         of_reconfig_notifier_register(&nx842_of_nb);
1191
1192         ret = nx842_OF_upd(NULL);
1193         if (ret && ret != -ENODEV) {
1194                 dev_err(&viodev->dev, "could not parse device tree. %d\n", ret);
1195                 ret = -1;
1196                 goto error;
1197         }
1198
1199         rcu_read_lock();
1200         if (dev_set_drvdata(&viodev->dev, rcu_dereference(devdata))) {
1201                 rcu_read_unlock();
1202                 dev_err(&viodev->dev, "failed to set driver data for device\n");
1203                 ret = -1;
1204                 goto error;
1205         }
1206         rcu_read_unlock();
1207
1208         if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
1209                 dev_err(&viodev->dev, "could not create sysfs device attributes\n");
1210                 ret = -1;
1211                 goto error;
1212         }
1213
1214         return 0;
1215
1216 error_unlock:
1217         spin_unlock_irqrestore(&devdata_mutex, flags);
1218         if (new_devdata)
1219                 kfree(new_devdata->counters);
1220         kfree(new_devdata);
1221 error:
1222         return ret;
1223 }
1224
1225 static int __exit nx842_remove(struct vio_dev *viodev)
1226 {
1227         struct nx842_devdata *old_devdata;
1228         unsigned long flags;
1229
1230         pr_info("Removing IBM Power 842 compression device\n");
1231         sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
1232
1233         spin_lock_irqsave(&devdata_mutex, flags);
1234         old_devdata = rcu_dereference_check(devdata,
1235                         lockdep_is_held(&devdata_mutex));
1236         of_reconfig_notifier_unregister(&nx842_of_nb);
1237         rcu_assign_pointer(devdata, NULL);
1238         spin_unlock_irqrestore(&devdata_mutex, flags);
1239         synchronize_rcu();
1240         dev_set_drvdata(&viodev->dev, NULL);
1241         if (old_devdata)
1242                 kfree(old_devdata->counters);
1243         kfree(old_devdata);
1244         return 0;
1245 }
1246
1247 static struct vio_device_id nx842_driver_ids[] = {
1248         {"ibm,compression-v1", "ibm,compression"},
1249         {"", ""},
1250 };
1251
1252 static struct vio_driver nx842_driver = {
1253         .name = MODULE_NAME,
1254         .probe = nx842_probe,
1255         .remove = nx842_remove,
1256         .get_desired_dma = nx842_get_desired_dma,
1257         .id_table = nx842_driver_ids,
1258 };
1259
1260 static int __init nx842_init(void)
1261 {
1262         struct nx842_devdata *new_devdata;
1263         pr_info("Registering IBM Power 842 compression driver\n");
1264
1265         RCU_INIT_POINTER(devdata, NULL);
1266         new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
1267         if (!new_devdata) {
1268                 pr_err("Could not allocate memory for device data\n");
1269                 return -ENOMEM;
1270         }
1271         new_devdata->status = UNAVAILABLE;
1272         RCU_INIT_POINTER(devdata, new_devdata);
1273
1274         return vio_register_driver(&nx842_driver);
1275 }
1276
1277 module_init(nx842_init);
1278
1279 static void __exit nx842_exit(void)
1280 {
1281         struct nx842_devdata *old_devdata;
1282         unsigned long flags;
1283
1284         pr_info("Exiting IBM Power 842 compression driver\n");
1285         spin_lock_irqsave(&devdata_mutex, flags);
1286         old_devdata = rcu_dereference_check(devdata,
1287                         lockdep_is_held(&devdata_mutex));
1288         rcu_assign_pointer(devdata, NULL);
1289         spin_unlock_irqrestore(&devdata_mutex, flags);
1290         synchronize_rcu();
1291         if (old_devdata)
1292                 dev_set_drvdata(old_devdata->dev, NULL);
1293         kfree(old_devdata);
1294         vio_unregister_driver(&nx842_driver);
1295 }
1296
1297 module_exit(nx842_exit);
1298
1299 /*********************************
1300  * 842 software decompressor
1301 *********************************/
1302 typedef int (*sw842_template_op)(const char **, int *, unsigned char **,
1303                                                 struct sw842_fifo *);
1304
1305 static int sw842_data8(const char **, int *, unsigned char **,
1306                                                 struct sw842_fifo *);
1307 static int sw842_data4(const char **, int *, unsigned char **,
1308                                                 struct sw842_fifo *);
1309 static int sw842_data2(const char **, int *, unsigned char **,
1310                                                 struct sw842_fifo *);
1311 static int sw842_ptr8(const char **, int *, unsigned char **,
1312                                                 struct sw842_fifo *);
1313 static int sw842_ptr4(const char **, int *, unsigned char **,
1314                                                 struct sw842_fifo *);
1315 static int sw842_ptr2(const char **, int *, unsigned char **,
1316                                                 struct sw842_fifo *);
1317
1318 /* special templates */
1319 #define SW842_TMPL_REPEAT 0x1B
1320 #define SW842_TMPL_ZEROS 0x1C
1321 #define SW842_TMPL_EOF 0x1E
1322
1323 static sw842_template_op sw842_tmpl_ops[26][4] = {
1324         { sw842_data8, NULL}, /* 0 (00000) */
1325         { sw842_data4, sw842_data2, sw842_ptr2,  NULL},
1326         { sw842_data4, sw842_ptr2,  sw842_data2, NULL},
1327         { sw842_data4, sw842_ptr2,  sw842_ptr2,  NULL},
1328         { sw842_data4, sw842_ptr4,  NULL},
1329         { sw842_data2, sw842_ptr2,  sw842_data4, NULL},
1330         { sw842_data2, sw842_ptr2,  sw842_data2, sw842_ptr2},
1331         { sw842_data2, sw842_ptr2,  sw842_ptr2,  sw842_data2},
1332         { sw842_data2, sw842_ptr2,  sw842_ptr2,  sw842_ptr2,},
1333         { sw842_data2, sw842_ptr2,  sw842_ptr4,  NULL},
1334         { sw842_ptr2,  sw842_data2, sw842_data4, NULL}, /* 10 (01010) */
1335         { sw842_ptr2,  sw842_data4, sw842_ptr2,  NULL},
1336         { sw842_ptr2,  sw842_data2, sw842_ptr2,  sw842_data2},
1337         { sw842_ptr2,  sw842_data2, sw842_ptr2,  sw842_ptr2},
1338         { sw842_ptr2,  sw842_data2, sw842_ptr4,  NULL},
1339         { sw842_ptr2,  sw842_ptr2,  sw842_data4, NULL},
1340         { sw842_ptr2,  sw842_ptr2,  sw842_data2, sw842_ptr2},
1341         { sw842_ptr2,  sw842_ptr2,  sw842_ptr2,  sw842_data2},
1342         { sw842_ptr2,  sw842_ptr2,  sw842_ptr2,  sw842_ptr2},
1343         { sw842_ptr2,  sw842_ptr2,  sw842_ptr4,  NULL},
1344         { sw842_ptr4,  sw842_data4, NULL}, /* 20 (10100) */
1345         { sw842_ptr4,  sw842_data2, sw842_ptr2,  NULL},
1346         { sw842_ptr4,  sw842_ptr2,  sw842_data2, NULL},
1347         { sw842_ptr4,  sw842_ptr2,  sw842_ptr2,  NULL},
1348         { sw842_ptr4,  sw842_ptr4,  NULL},
1349         { sw842_ptr8,  NULL}
1350 };
1351
1352 /* Software decompress helpers */
1353
1354 static uint8_t sw842_get_byte(const char *buf, int bit)
1355 {
1356         uint8_t tmpl;
1357         uint16_t tmp;
1358         tmp = htons(*(uint16_t *)(buf));
1359         tmp = (uint16_t)(tmp << bit);
1360         tmp = ntohs(tmp);
1361         memcpy(&tmpl, &tmp, 1);
1362         return tmpl;
1363 }
1364
1365 static uint8_t sw842_get_template(const char **buf, int *bit)
1366 {
1367         uint8_t byte;
1368         byte = sw842_get_byte(*buf, *bit);
1369         byte = byte >> 3;
1370         byte &= 0x1F;
1371         *buf += (*bit + 5) / 8;
1372         *bit = (*bit + 5) % 8;
1373         return byte;
1374 }
1375
1376 /* repeat_count happens to be 5-bit too (like the template) */
1377 static uint8_t sw842_get_repeat_count(const char **buf, int *bit)
1378 {
1379         uint8_t byte;
1380         byte = sw842_get_byte(*buf, *bit);
1381         byte = byte >> 2;
1382         byte &= 0x3F;
1383         *buf += (*bit + 6) / 8;
1384         *bit = (*bit + 6) % 8;
1385         return byte;
1386 }
1387
1388 static uint8_t sw842_get_ptr2(const char **buf, int *bit)
1389 {
1390         uint8_t ptr;
1391         ptr = sw842_get_byte(*buf, *bit);
1392         (*buf)++;
1393         return ptr;
1394 }
1395
1396 static uint16_t sw842_get_ptr4(const char **buf, int *bit,
1397                 struct sw842_fifo *fifo)
1398 {
1399         uint16_t ptr;
1400         ptr = htons(*(uint16_t *)(*buf));
1401         ptr = (uint16_t)(ptr << *bit);
1402         ptr = ptr >> 7;
1403         ptr &= 0x01FF;
1404         *buf += (*bit + 9) / 8;
1405         *bit = (*bit + 9) % 8;
1406         return ptr;
1407 }
1408
1409 static uint8_t sw842_get_ptr8(const char **buf, int *bit,
1410                 struct sw842_fifo *fifo)
1411 {
1412         return sw842_get_ptr2(buf, bit);
1413 }
1414
1415 /* Software decompress template ops */
1416
1417 static int sw842_data8(const char **inbuf, int *inbit,
1418                 unsigned char **outbuf, struct sw842_fifo *fifo)
1419 {
1420         int ret;
1421
1422         ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1423         if (ret)
1424                 return ret;
1425         ret = sw842_data4(inbuf, inbit, outbuf, fifo);
1426         return ret;
1427 }
1428
1429 static int sw842_data4(const char **inbuf, int *inbit,
1430                 unsigned char **outbuf, struct sw842_fifo *fifo)
1431 {
1432         int ret;
1433
1434         ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1435         if (ret)
1436                 return ret;
1437         ret = sw842_data2(inbuf, inbit, outbuf, fifo);
1438         return ret;
1439 }
1440
1441 static int sw842_data2(const char **inbuf, int *inbit,
1442                 unsigned char **outbuf, struct sw842_fifo *fifo)
1443 {
1444         **outbuf = sw842_get_byte(*inbuf, *inbit);
1445         (*inbuf)++;
1446         (*outbuf)++;
1447         **outbuf = sw842_get_byte(*inbuf, *inbit);
1448         (*inbuf)++;
1449         (*outbuf)++;
1450         return 0;
1451 }
1452
1453 static int sw842_ptr8(const char **inbuf, int *inbit,
1454                 unsigned char **outbuf, struct sw842_fifo *fifo)
1455 {
1456         uint8_t ptr;
1457         ptr = sw842_get_ptr8(inbuf, inbit, fifo);
1458         if (!fifo->f84_full && (ptr >= fifo->f8_count))
1459                 return 1;
1460         memcpy(*outbuf, fifo->f8[ptr], 8);
1461         *outbuf += 8;
1462         return 0;
1463 }
1464
1465 static int sw842_ptr4(const char **inbuf, int *inbit,
1466                 unsigned char **outbuf, struct sw842_fifo *fifo)
1467 {
1468         uint16_t ptr;
1469         ptr = sw842_get_ptr4(inbuf, inbit, fifo);
1470         if (!fifo->f84_full && (ptr >= fifo->f4_count))
1471                 return 1;
1472         memcpy(*outbuf, fifo->f4[ptr], 4);
1473         *outbuf += 4;
1474         return 0;
1475 }
1476
1477 static int sw842_ptr2(const char **inbuf, int *inbit,
1478                 unsigned char **outbuf, struct sw842_fifo *fifo)
1479 {
1480         uint8_t ptr;
1481         ptr = sw842_get_ptr2(inbuf, inbit);
1482         if (!fifo->f2_full && (ptr >= fifo->f2_count))
1483                 return 1;
1484         memcpy(*outbuf, fifo->f2[ptr], 2);
1485         *outbuf += 2;
1486         return 0;
1487 }
1488
1489 static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo)
1490 {
1491         unsigned char initial_f2count = fifo->f2_count;
1492
1493         memcpy(fifo->f8[fifo->f8_count], buf, 8);
1494         fifo->f4_count += 2;
1495         fifo->f8_count += 1;
1496
1497         if (!fifo->f84_full && fifo->f4_count >= 512) {
1498                 fifo->f84_full = 1;
1499                 fifo->f4_count /= 512;
1500         }
1501
1502         memcpy(fifo->f2[fifo->f2_count++], buf, 2);
1503         memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2);
1504         memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2);
1505         memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2);
1506         if (fifo->f2_count < initial_f2count)
1507                 fifo->f2_full = 1;
1508 }
1509
1510 static int sw842_decompress(const unsigned char *src, int srclen,
1511                         unsigned char *dst, int *destlen,
1512                         const void *wrkmem)
1513 {
1514         uint8_t tmpl;
1515         const char *inbuf;
1516         int inbit = 0;
1517         unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf;
1518         const char *inbuf_end;
1519         sw842_template_op op;
1520         int opindex;
1521         int i, repeat_count;
1522         struct sw842_fifo *fifo;
1523         int ret = 0;
1524
1525         fifo = &((struct nx842_workmem *)(wrkmem))->swfifo;
1526         memset(fifo, 0, sizeof(*fifo));
1527
1528         origbuf = NULL;
1529         inbuf = src;
1530         inbuf_end = src + srclen;
1531         outbuf = dst;
1532         outbuf_end = dst + *destlen;
1533
1534         while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) {
1535                 if (inbuf >= inbuf_end) {
1536                         ret = -EINVAL;
1537                         goto out;
1538                 }
1539
1540                 opindex = 0;
1541                 prevbuf = origbuf;
1542                 origbuf = outbuf;
1543                 switch (tmpl) {
1544                 case SW842_TMPL_REPEAT:
1545                         if (prevbuf == NULL) {
1546                                 ret = -EINVAL;
1547                                 goto out;
1548                         }
1549
1550                         repeat_count = sw842_get_repeat_count(&inbuf,
1551                                                                 &inbit) + 1;
1552
1553                         /* Did the repeat count advance past the end of input */
1554                         if (inbuf > inbuf_end) {
1555                                 ret = -EINVAL;
1556                                 goto out;
1557                         }
1558
1559                         for (i = 0; i < repeat_count; i++) {
1560                                 /* Would this overflow the output buffer */
1561                                 if ((outbuf + 8) > outbuf_end) {
1562                                         ret = -ENOSPC;
1563                                         goto out;
1564                                 }
1565
1566                                 memcpy(outbuf, prevbuf, 8);
1567                                 sw842_copy_to_fifo(outbuf, fifo);
1568                                 outbuf += 8;
1569                         }
1570                         break;
1571
1572                 case SW842_TMPL_ZEROS:
1573                         /* Would this overflow the output buffer */
1574                         if ((outbuf + 8) > outbuf_end) {
1575                                 ret = -ENOSPC;
1576                                 goto out;
1577                         }
1578
1579                         memset(outbuf, 0, 8);
1580                         sw842_copy_to_fifo(outbuf, fifo);
1581                         outbuf += 8;
1582                         break;
1583
1584                 default:
1585                         if (tmpl > 25) {
1586                                 ret = -EINVAL;
1587                                 goto out;
1588                         }
1589
1590                         /* Does this go past the end of the input buffer */
1591                         if ((inbuf + 2) > inbuf_end) {
1592                                 ret = -EINVAL;
1593                                 goto out;
1594                         }
1595
1596                         /* Would this overflow the output buffer */
1597                         if ((outbuf + 8) > outbuf_end) {
1598                                 ret = -ENOSPC;
1599                                 goto out;
1600                         }
1601
1602                         while (opindex < 4 &&
1603                                 (op = sw842_tmpl_ops[tmpl][opindex++])
1604                                         != NULL) {
1605                                 ret = (*op)(&inbuf, &inbit, &outbuf, fifo);
1606                                 if (ret) {
1607                                         ret = -EINVAL;
1608                                         goto out;
1609                                 }
1610                                 sw842_copy_to_fifo(origbuf, fifo);
1611                         }
1612                 }
1613         }
1614
1615 out:
1616         if (!ret)
1617                 *destlen = (unsigned int)(outbuf - dst);
1618         else
1619                 *destlen = 0;
1620
1621         return ret;
1622 }