Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi...
[sfrench/cifs-2.6.git] / kernel / power / swap.c
1 /*
2  * linux/kernel/power/swap.c
3  *
4  * This file provides functions for reading the suspend image from
5  * and writing it to a swap partition.
6  *
7  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
8  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9  * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com>
10  *
11  * This file is released under the GPLv2.
12  *
13  */
14
15 #include <linux/module.h>
16 #include <linux/file.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/genhd.h>
20 #include <linux/device.h>
21 #include <linux/bio.h>
22 #include <linux/blkdev.h>
23 #include <linux/swap.h>
24 #include <linux/swapops.h>
25 #include <linux/pm.h>
26 #include <linux/slab.h>
27 #include <linux/lzo.h>
28 #include <linux/vmalloc.h>
29 #include <linux/cpumask.h>
30 #include <linux/atomic.h>
31 #include <linux/kthread.h>
32 #include <linux/crc32.h>
33
34 #include "power.h"
35
36 #define HIBERNATE_SIG   "S1SUSPEND"
37
38 /*
39  *      The swap map is a data structure used for keeping track of each page
40  *      written to a swap partition.  It consists of many swap_map_page
41  *      structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
42  *      These structures are stored on the swap and linked together with the
43  *      help of the .next_swap member.
44  *
45  *      The swap map is created during suspend.  The swap map pages are
46  *      allocated and populated one at a time, so we only need one memory
47  *      page to set up the entire structure.
48  *
49  *      During resume we pick up all swap_map_page structures into a list.
50  */
51
52 #define MAP_PAGE_ENTRIES        (PAGE_SIZE / sizeof(sector_t) - 1)
53
54 struct swap_map_page {
55         sector_t entries[MAP_PAGE_ENTRIES];
56         sector_t next_swap;
57 };
58
59 struct swap_map_page_list {
60         struct swap_map_page *map;
61         struct swap_map_page_list *next;
62 };
63
64 /**
65  *      The swap_map_handle structure is used for handling swap in
66  *      a file-alike way
67  */
68
69 struct swap_map_handle {
70         struct swap_map_page *cur;
71         struct swap_map_page_list *maps;
72         sector_t cur_swap;
73         sector_t first_sector;
74         unsigned int k;
75         unsigned long nr_free_pages, written;
76         u32 crc32;
77 };
78
79 struct swsusp_header {
80         char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
81                       sizeof(u32)];
82         u32     crc32;
83         sector_t image;
84         unsigned int flags;     /* Flags to pass to the "boot" kernel */
85         char    orig_sig[10];
86         char    sig[10];
87 } __attribute__((packed));
88
89 static struct swsusp_header *swsusp_header;
90
91 /**
92  *      The following functions are used for tracing the allocated
93  *      swap pages, so that they can be freed in case of an error.
94  */
95
96 struct swsusp_extent {
97         struct rb_node node;
98         unsigned long start;
99         unsigned long end;
100 };
101
102 static struct rb_root swsusp_extents = RB_ROOT;
103
104 static int swsusp_extents_insert(unsigned long swap_offset)
105 {
106         struct rb_node **new = &(swsusp_extents.rb_node);
107         struct rb_node *parent = NULL;
108         struct swsusp_extent *ext;
109
110         /* Figure out where to put the new node */
111         while (*new) {
112                 ext = container_of(*new, struct swsusp_extent, node);
113                 parent = *new;
114                 if (swap_offset < ext->start) {
115                         /* Try to merge */
116                         if (swap_offset == ext->start - 1) {
117                                 ext->start--;
118                                 return 0;
119                         }
120                         new = &((*new)->rb_left);
121                 } else if (swap_offset > ext->end) {
122                         /* Try to merge */
123                         if (swap_offset == ext->end + 1) {
124                                 ext->end++;
125                                 return 0;
126                         }
127                         new = &((*new)->rb_right);
128                 } else {
129                         /* It already is in the tree */
130                         return -EINVAL;
131                 }
132         }
133         /* Add the new node and rebalance the tree. */
134         ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
135         if (!ext)
136                 return -ENOMEM;
137
138         ext->start = swap_offset;
139         ext->end = swap_offset;
140         rb_link_node(&ext->node, parent, new);
141         rb_insert_color(&ext->node, &swsusp_extents);
142         return 0;
143 }
144
145 /**
146  *      alloc_swapdev_block - allocate a swap page and register that it has
147  *      been allocated, so that it can be freed in case of an error.
148  */
149
150 sector_t alloc_swapdev_block(int swap)
151 {
152         unsigned long offset;
153
154         offset = swp_offset(get_swap_page_of_type(swap));
155         if (offset) {
156                 if (swsusp_extents_insert(offset))
157                         swap_free(swp_entry(swap, offset));
158                 else
159                         return swapdev_block(swap, offset);
160         }
161         return 0;
162 }
163
164 /**
165  *      free_all_swap_pages - free swap pages allocated for saving image data.
166  *      It also frees the extents used to register which swap entries had been
167  *      allocated.
168  */
169
170 void free_all_swap_pages(int swap)
171 {
172         struct rb_node *node;
173
174         while ((node = swsusp_extents.rb_node)) {
175                 struct swsusp_extent *ext;
176                 unsigned long offset;
177
178                 ext = container_of(node, struct swsusp_extent, node);
179                 rb_erase(node, &swsusp_extents);
180                 for (offset = ext->start; offset <= ext->end; offset++)
181                         swap_free(swp_entry(swap, offset));
182
183                 kfree(ext);
184         }
185 }
186
187 int swsusp_swap_in_use(void)
188 {
189         return (swsusp_extents.rb_node != NULL);
190 }
191
192 /*
193  * General things
194  */
195
196 static unsigned short root_swap = 0xffff;
197 struct block_device *hib_resume_bdev;
198
199 /*
200  * Saving part
201  */
202
203 static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
204 {
205         int error;
206
207         hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
208         if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
209             !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
210                 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
211                 memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
212                 swsusp_header->image = handle->first_sector;
213                 swsusp_header->flags = flags;
214                 if (flags & SF_CRC32_MODE)
215                         swsusp_header->crc32 = handle->crc32;
216                 error = hib_bio_write_page(swsusp_resume_block,
217                                         swsusp_header, NULL);
218         } else {
219                 printk(KERN_ERR "PM: Swap header not found!\n");
220                 error = -ENODEV;
221         }
222         return error;
223 }
224
225 /**
226  *      swsusp_swap_check - check if the resume device is a swap device
227  *      and get its index (if so)
228  *
229  *      This is called before saving image
230  */
231 static int swsusp_swap_check(void)
232 {
233         int res;
234
235         res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
236                         &hib_resume_bdev);
237         if (res < 0)
238                 return res;
239
240         root_swap = res;
241         res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
242         if (res)
243                 return res;
244
245         res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
246         if (res < 0)
247                 blkdev_put(hib_resume_bdev, FMODE_WRITE);
248
249         return res;
250 }
251
252 /**
253  *      write_page - Write one page to given swap location.
254  *      @buf:           Address we're writing.
255  *      @offset:        Offset of the swap page we're writing to.
256  *      @bio_chain:     Link the next write BIO here
257  */
258
259 static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
260 {
261         void *src;
262         int ret;
263
264         if (!offset)
265                 return -ENOSPC;
266
267         if (bio_chain) {
268                 src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
269                 if (src) {
270                         copy_page(src, buf);
271                 } else {
272                         ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
273                         if (ret)
274                                 return ret;
275                         src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
276                         if (src) {
277                                 copy_page(src, buf);
278                         } else {
279                                 WARN_ON_ONCE(1);
280                                 bio_chain = NULL;       /* Go synchronous */
281                                 src = buf;
282                         }
283                 }
284         } else {
285                 src = buf;
286         }
287         return hib_bio_write_page(offset, src, bio_chain);
288 }
289
290 static void release_swap_writer(struct swap_map_handle *handle)
291 {
292         if (handle->cur)
293                 free_page((unsigned long)handle->cur);
294         handle->cur = NULL;
295 }
296
297 static int get_swap_writer(struct swap_map_handle *handle)
298 {
299         int ret;
300
301         ret = swsusp_swap_check();
302         if (ret) {
303                 if (ret != -ENOSPC)
304                         printk(KERN_ERR "PM: Cannot find swap device, try "
305                                         "swapon -a.\n");
306                 return ret;
307         }
308         handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
309         if (!handle->cur) {
310                 ret = -ENOMEM;
311                 goto err_close;
312         }
313         handle->cur_swap = alloc_swapdev_block(root_swap);
314         if (!handle->cur_swap) {
315                 ret = -ENOSPC;
316                 goto err_rel;
317         }
318         handle->k = 0;
319         handle->nr_free_pages = nr_free_pages() >> 1;
320         handle->written = 0;
321         handle->first_sector = handle->cur_swap;
322         return 0;
323 err_rel:
324         release_swap_writer(handle);
325 err_close:
326         swsusp_close(FMODE_WRITE);
327         return ret;
328 }
329
330 static int swap_write_page(struct swap_map_handle *handle, void *buf,
331                                 struct bio **bio_chain)
332 {
333         int error = 0;
334         sector_t offset;
335
336         if (!handle->cur)
337                 return -EINVAL;
338         offset = alloc_swapdev_block(root_swap);
339         error = write_page(buf, offset, bio_chain);
340         if (error)
341                 return error;
342         handle->cur->entries[handle->k++] = offset;
343         if (handle->k >= MAP_PAGE_ENTRIES) {
344                 offset = alloc_swapdev_block(root_swap);
345                 if (!offset)
346                         return -ENOSPC;
347                 handle->cur->next_swap = offset;
348                 error = write_page(handle->cur, handle->cur_swap, bio_chain);
349                 if (error)
350                         goto out;
351                 clear_page(handle->cur);
352                 handle->cur_swap = offset;
353                 handle->k = 0;
354         }
355         if (bio_chain && ++handle->written > handle->nr_free_pages) {
356                 error = hib_wait_on_bio_chain(bio_chain);
357                 if (error)
358                         goto out;
359                 handle->written = 0;
360         }
361  out:
362         return error;
363 }
364
365 static int flush_swap_writer(struct swap_map_handle *handle)
366 {
367         if (handle->cur && handle->cur_swap)
368                 return write_page(handle->cur, handle->cur_swap, NULL);
369         else
370                 return -EINVAL;
371 }
372
373 static int swap_writer_finish(struct swap_map_handle *handle,
374                 unsigned int flags, int error)
375 {
376         if (!error) {
377                 flush_swap_writer(handle);
378                 printk(KERN_INFO "PM: S");
379                 error = mark_swapfiles(handle, flags);
380                 printk("|\n");
381         }
382
383         if (error)
384                 free_all_swap_pages(root_swap);
385         release_swap_writer(handle);
386         swsusp_close(FMODE_WRITE);
387
388         return error;
389 }
390
391 /* We need to remember how much compressed data we need to read. */
392 #define LZO_HEADER      sizeof(size_t)
393
394 /* Number of pages/bytes we'll compress at one time. */
395 #define LZO_UNC_PAGES   32
396 #define LZO_UNC_SIZE    (LZO_UNC_PAGES * PAGE_SIZE)
397
398 /* Number of pages/bytes we need for compressed data (worst case). */
399 #define LZO_CMP_PAGES   DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \
400                                      LZO_HEADER, PAGE_SIZE)
401 #define LZO_CMP_SIZE    (LZO_CMP_PAGES * PAGE_SIZE)
402
403 /* Maximum number of threads for compression/decompression. */
404 #define LZO_THREADS     3
405
406 /* Maximum number of pages for read buffering. */
407 #define LZO_READ_PAGES  (MAP_PAGE_ENTRIES * 8)
408
409
410 /**
411  *      save_image - save the suspend image data
412  */
413
414 static int save_image(struct swap_map_handle *handle,
415                       struct snapshot_handle *snapshot,
416                       unsigned int nr_to_write)
417 {
418         unsigned int m;
419         int ret;
420         int nr_pages;
421         int err2;
422         struct bio *bio;
423         struct timeval start;
424         struct timeval stop;
425
426         printk(KERN_INFO "PM: Saving image data pages (%u pages) ...     ",
427                 nr_to_write);
428         m = nr_to_write / 100;
429         if (!m)
430                 m = 1;
431         nr_pages = 0;
432         bio = NULL;
433         do_gettimeofday(&start);
434         while (1) {
435                 ret = snapshot_read_next(snapshot);
436                 if (ret <= 0)
437                         break;
438                 ret = swap_write_page(handle, data_of(*snapshot), &bio);
439                 if (ret)
440                         break;
441                 if (!(nr_pages % m))
442                         printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
443                 nr_pages++;
444         }
445         err2 = hib_wait_on_bio_chain(&bio);
446         do_gettimeofday(&stop);
447         if (!ret)
448                 ret = err2;
449         if (!ret)
450                 printk(KERN_CONT "\b\b\b\bdone\n");
451         else
452                 printk(KERN_CONT "\n");
453         swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
454         return ret;
455 }
456
457 /**
458  * Structure used for CRC32.
459  */
460 struct crc_data {
461         struct task_struct *thr;                  /* thread */
462         atomic_t ready;                           /* ready to start flag */
463         atomic_t stop;                            /* ready to stop flag */
464         unsigned run_threads;                     /* nr current threads */
465         wait_queue_head_t go;                     /* start crc update */
466         wait_queue_head_t done;                   /* crc update done */
467         u32 *crc32;                               /* points to handle's crc32 */
468         size_t *unc_len[LZO_THREADS];             /* uncompressed lengths */
469         unsigned char *unc[LZO_THREADS];          /* uncompressed data */
470 };
471
472 /**
473  * CRC32 update function that runs in its own thread.
474  */
475 static int crc32_threadfn(void *data)
476 {
477         struct crc_data *d = data;
478         unsigned i;
479
480         while (1) {
481                 wait_event(d->go, atomic_read(&d->ready) ||
482                                   kthread_should_stop());
483                 if (kthread_should_stop()) {
484                         d->thr = NULL;
485                         atomic_set(&d->stop, 1);
486                         wake_up(&d->done);
487                         break;
488                 }
489                 atomic_set(&d->ready, 0);
490
491                 for (i = 0; i < d->run_threads; i++)
492                         *d->crc32 = crc32_le(*d->crc32,
493                                              d->unc[i], *d->unc_len[i]);
494                 atomic_set(&d->stop, 1);
495                 wake_up(&d->done);
496         }
497         return 0;
498 }
499 /**
500  * Structure used for LZO data compression.
501  */
502 struct cmp_data {
503         struct task_struct *thr;                  /* thread */
504         atomic_t ready;                           /* ready to start flag */
505         atomic_t stop;                            /* ready to stop flag */
506         int ret;                                  /* return code */
507         wait_queue_head_t go;                     /* start compression */
508         wait_queue_head_t done;                   /* compression done */
509         size_t unc_len;                           /* uncompressed length */
510         size_t cmp_len;                           /* compressed length */
511         unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
512         unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
513         unsigned char wrk[LZO1X_1_MEM_COMPRESS];  /* compression workspace */
514 };
515
516 /**
517  * Compression function that runs in its own thread.
518  */
519 static int lzo_compress_threadfn(void *data)
520 {
521         struct cmp_data *d = data;
522
523         while (1) {
524                 wait_event(d->go, atomic_read(&d->ready) ||
525                                   kthread_should_stop());
526                 if (kthread_should_stop()) {
527                         d->thr = NULL;
528                         d->ret = -1;
529                         atomic_set(&d->stop, 1);
530                         wake_up(&d->done);
531                         break;
532                 }
533                 atomic_set(&d->ready, 0);
534
535                 d->ret = lzo1x_1_compress(d->unc, d->unc_len,
536                                           d->cmp + LZO_HEADER, &d->cmp_len,
537                                           d->wrk);
538                 atomic_set(&d->stop, 1);
539                 wake_up(&d->done);
540         }
541         return 0;
542 }
543
544 /**
545  * save_image_lzo - Save the suspend image data compressed with LZO.
546  * @handle: Swap mam handle to use for saving the image.
547  * @snapshot: Image to read data from.
548  * @nr_to_write: Number of pages to save.
549  */
550 static int save_image_lzo(struct swap_map_handle *handle,
551                           struct snapshot_handle *snapshot,
552                           unsigned int nr_to_write)
553 {
554         unsigned int m;
555         int ret = 0;
556         int nr_pages;
557         int err2;
558         struct bio *bio;
559         struct timeval start;
560         struct timeval stop;
561         size_t off;
562         unsigned thr, run_threads, nr_threads;
563         unsigned char *page = NULL;
564         struct cmp_data *data = NULL;
565         struct crc_data *crc = NULL;
566
567         /*
568          * We'll limit the number of threads for compression to limit memory
569          * footprint.
570          */
571         nr_threads = num_online_cpus() - 1;
572         nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
573
574         page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
575         if (!page) {
576                 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
577                 ret = -ENOMEM;
578                 goto out_clean;
579         }
580
581         data = vmalloc(sizeof(*data) * nr_threads);
582         if (!data) {
583                 printk(KERN_ERR "PM: Failed to allocate LZO data\n");
584                 ret = -ENOMEM;
585                 goto out_clean;
586         }
587         for (thr = 0; thr < nr_threads; thr++)
588                 memset(&data[thr], 0, offsetof(struct cmp_data, go));
589
590         crc = kmalloc(sizeof(*crc), GFP_KERNEL);
591         if (!crc) {
592                 printk(KERN_ERR "PM: Failed to allocate crc\n");
593                 ret = -ENOMEM;
594                 goto out_clean;
595         }
596         memset(crc, 0, offsetof(struct crc_data, go));
597
598         /*
599          * Start the compression threads.
600          */
601         for (thr = 0; thr < nr_threads; thr++) {
602                 init_waitqueue_head(&data[thr].go);
603                 init_waitqueue_head(&data[thr].done);
604
605                 data[thr].thr = kthread_run(lzo_compress_threadfn,
606                                             &data[thr],
607                                             "image_compress/%u", thr);
608                 if (IS_ERR(data[thr].thr)) {
609                         data[thr].thr = NULL;
610                         printk(KERN_ERR
611                                "PM: Cannot start compression threads\n");
612                         ret = -ENOMEM;
613                         goto out_clean;
614                 }
615         }
616
617         /*
618          * Adjust number of free pages after all allocations have been done.
619          * We don't want to run out of pages when writing.
620          */
621         handle->nr_free_pages = nr_free_pages() >> 1;
622
623         /*
624          * Start the CRC32 thread.
625          */
626         init_waitqueue_head(&crc->go);
627         init_waitqueue_head(&crc->done);
628
629         handle->crc32 = 0;
630         crc->crc32 = &handle->crc32;
631         for (thr = 0; thr < nr_threads; thr++) {
632                 crc->unc[thr] = data[thr].unc;
633                 crc->unc_len[thr] = &data[thr].unc_len;
634         }
635
636         crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
637         if (IS_ERR(crc->thr)) {
638                 crc->thr = NULL;
639                 printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
640                 ret = -ENOMEM;
641                 goto out_clean;
642         }
643
644         printk(KERN_INFO
645                 "PM: Using %u thread(s) for compression.\n"
646                 "PM: Compressing and saving image data (%u pages) ...     ",
647                 nr_threads, nr_to_write);
648         m = nr_to_write / 100;
649         if (!m)
650                 m = 1;
651         nr_pages = 0;
652         bio = NULL;
653         do_gettimeofday(&start);
654         for (;;) {
655                 for (thr = 0; thr < nr_threads; thr++) {
656                         for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
657                                 ret = snapshot_read_next(snapshot);
658                                 if (ret < 0)
659                                         goto out_finish;
660
661                                 if (!ret)
662                                         break;
663
664                                 memcpy(data[thr].unc + off,
665                                        data_of(*snapshot), PAGE_SIZE);
666
667                                 if (!(nr_pages % m))
668                                         printk(KERN_CONT "\b\b\b\b%3d%%",
669                                                nr_pages / m);
670                                 nr_pages++;
671                         }
672                         if (!off)
673                                 break;
674
675                         data[thr].unc_len = off;
676
677                         atomic_set(&data[thr].ready, 1);
678                         wake_up(&data[thr].go);
679                 }
680
681                 if (!thr)
682                         break;
683
684                 crc->run_threads = thr;
685                 atomic_set(&crc->ready, 1);
686                 wake_up(&crc->go);
687
688                 for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
689                         wait_event(data[thr].done,
690                                    atomic_read(&data[thr].stop));
691                         atomic_set(&data[thr].stop, 0);
692
693                         ret = data[thr].ret;
694
695                         if (ret < 0) {
696                                 printk(KERN_ERR "PM: LZO compression failed\n");
697                                 goto out_finish;
698                         }
699
700                         if (unlikely(!data[thr].cmp_len ||
701                                      data[thr].cmp_len >
702                                      lzo1x_worst_compress(data[thr].unc_len))) {
703                                 printk(KERN_ERR
704                                        "PM: Invalid LZO compressed length\n");
705                                 ret = -1;
706                                 goto out_finish;
707                         }
708
709                         *(size_t *)data[thr].cmp = data[thr].cmp_len;
710
711                         /*
712                          * Given we are writing one page at a time to disk, we
713                          * copy that much from the buffer, although the last
714                          * bit will likely be smaller than full page. This is
715                          * OK - we saved the length of the compressed data, so
716                          * any garbage at the end will be discarded when we
717                          * read it.
718                          */
719                         for (off = 0;
720                              off < LZO_HEADER + data[thr].cmp_len;
721                              off += PAGE_SIZE) {
722                                 memcpy(page, data[thr].cmp + off, PAGE_SIZE);
723
724                                 ret = swap_write_page(handle, page, &bio);
725                                 if (ret)
726                                         goto out_finish;
727                         }
728                 }
729
730                 wait_event(crc->done, atomic_read(&crc->stop));
731                 atomic_set(&crc->stop, 0);
732         }
733
734 out_finish:
735         err2 = hib_wait_on_bio_chain(&bio);
736         do_gettimeofday(&stop);
737         if (!ret)
738                 ret = err2;
739         if (!ret) {
740                 printk(KERN_CONT "\b\b\b\bdone\n");
741         } else {
742                 printk(KERN_CONT "\n");
743         }
744         swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
745 out_clean:
746         if (crc) {
747                 if (crc->thr)
748                         kthread_stop(crc->thr);
749                 kfree(crc);
750         }
751         if (data) {
752                 for (thr = 0; thr < nr_threads; thr++)
753                         if (data[thr].thr)
754                                 kthread_stop(data[thr].thr);
755                 vfree(data);
756         }
757         if (page) free_page((unsigned long)page);
758
759         return ret;
760 }
761
762 /**
763  *      enough_swap - Make sure we have enough swap to save the image.
764  *
765  *      Returns TRUE or FALSE after checking the total amount of swap
766  *      space avaiable from the resume partition.
767  */
768
769 static int enough_swap(unsigned int nr_pages, unsigned int flags)
770 {
771         unsigned int free_swap = count_swap_pages(root_swap, 1);
772         unsigned int required;
773
774         pr_debug("PM: Free swap pages: %u\n", free_swap);
775
776         required = PAGES_FOR_IO + nr_pages;
777         return free_swap > required;
778 }
779
780 /**
781  *      swsusp_write - Write entire image and metadata.
782  *      @flags: flags to pass to the "boot" kernel in the image header
783  *
784  *      It is important _NOT_ to umount filesystems at this point. We want
785  *      them synced (in case something goes wrong) but we DO not want to mark
786  *      filesystem clean: it is not. (And it does not matter, if we resume
787  *      correctly, we'll mark system clean, anyway.)
788  */
789
790 int swsusp_write(unsigned int flags)
791 {
792         struct swap_map_handle handle;
793         struct snapshot_handle snapshot;
794         struct swsusp_info *header;
795         unsigned long pages;
796         int error;
797
798         pages = snapshot_get_image_size();
799         error = get_swap_writer(&handle);
800         if (error) {
801                 printk(KERN_ERR "PM: Cannot get swap writer\n");
802                 return error;
803         }
804         if (flags & SF_NOCOMPRESS_MODE) {
805                 if (!enough_swap(pages, flags)) {
806                         printk(KERN_ERR "PM: Not enough free swap\n");
807                         error = -ENOSPC;
808                         goto out_finish;
809                 }
810         }
811         memset(&snapshot, 0, sizeof(struct snapshot_handle));
812         error = snapshot_read_next(&snapshot);
813         if (error < PAGE_SIZE) {
814                 if (error >= 0)
815                         error = -EFAULT;
816
817                 goto out_finish;
818         }
819         header = (struct swsusp_info *)data_of(snapshot);
820         error = swap_write_page(&handle, header, NULL);
821         if (!error) {
822                 error = (flags & SF_NOCOMPRESS_MODE) ?
823                         save_image(&handle, &snapshot, pages - 1) :
824                         save_image_lzo(&handle, &snapshot, pages - 1);
825         }
826 out_finish:
827         error = swap_writer_finish(&handle, flags, error);
828         return error;
829 }
830
831 /**
832  *      The following functions allow us to read data using a swap map
833  *      in a file-alike way
834  */
835
836 static void release_swap_reader(struct swap_map_handle *handle)
837 {
838         struct swap_map_page_list *tmp;
839
840         while (handle->maps) {
841                 if (handle->maps->map)
842                         free_page((unsigned long)handle->maps->map);
843                 tmp = handle->maps;
844                 handle->maps = handle->maps->next;
845                 kfree(tmp);
846         }
847         handle->cur = NULL;
848 }
849
850 static int get_swap_reader(struct swap_map_handle *handle,
851                 unsigned int *flags_p)
852 {
853         int error;
854         struct swap_map_page_list *tmp, *last;
855         sector_t offset;
856
857         *flags_p = swsusp_header->flags;
858
859         if (!swsusp_header->image) /* how can this happen? */
860                 return -EINVAL;
861
862         handle->cur = NULL;
863         last = handle->maps = NULL;
864         offset = swsusp_header->image;
865         while (offset) {
866                 tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL);
867                 if (!tmp) {
868                         release_swap_reader(handle);
869                         return -ENOMEM;
870                 }
871                 memset(tmp, 0, sizeof(*tmp));
872                 if (!handle->maps)
873                         handle->maps = tmp;
874                 if (last)
875                         last->next = tmp;
876                 last = tmp;
877
878                 tmp->map = (struct swap_map_page *)
879                            __get_free_page(__GFP_WAIT | __GFP_HIGH);
880                 if (!tmp->map) {
881                         release_swap_reader(handle);
882                         return -ENOMEM;
883                 }
884
885                 error = hib_bio_read_page(offset, tmp->map, NULL);
886                 if (error) {
887                         release_swap_reader(handle);
888                         return error;
889                 }
890                 offset = tmp->map->next_swap;
891         }
892         handle->k = 0;
893         handle->cur = handle->maps->map;
894         return 0;
895 }
896
897 static int swap_read_page(struct swap_map_handle *handle, void *buf,
898                                 struct bio **bio_chain)
899 {
900         sector_t offset;
901         int error;
902         struct swap_map_page_list *tmp;
903
904         if (!handle->cur)
905                 return -EINVAL;
906         offset = handle->cur->entries[handle->k];
907         if (!offset)
908                 return -EFAULT;
909         error = hib_bio_read_page(offset, buf, bio_chain);
910         if (error)
911                 return error;
912         if (++handle->k >= MAP_PAGE_ENTRIES) {
913                 handle->k = 0;
914                 free_page((unsigned long)handle->maps->map);
915                 tmp = handle->maps;
916                 handle->maps = handle->maps->next;
917                 kfree(tmp);
918                 if (!handle->maps)
919                         release_swap_reader(handle);
920                 else
921                         handle->cur = handle->maps->map;
922         }
923         return error;
924 }
925
926 static int swap_reader_finish(struct swap_map_handle *handle)
927 {
928         release_swap_reader(handle);
929
930         return 0;
931 }
932
933 /**
934  *      load_image - load the image using the swap map handle
935  *      @handle and the snapshot handle @snapshot
936  *      (assume there are @nr_pages pages to load)
937  */
938
939 static int load_image(struct swap_map_handle *handle,
940                       struct snapshot_handle *snapshot,
941                       unsigned int nr_to_read)
942 {
943         unsigned int m;
944         int ret = 0;
945         struct timeval start;
946         struct timeval stop;
947         struct bio *bio;
948         int err2;
949         unsigned nr_pages;
950
951         printk(KERN_INFO "PM: Loading image data pages (%u pages) ...     ",
952                 nr_to_read);
953         m = nr_to_read / 100;
954         if (!m)
955                 m = 1;
956         nr_pages = 0;
957         bio = NULL;
958         do_gettimeofday(&start);
959         for ( ; ; ) {
960                 ret = snapshot_write_next(snapshot);
961                 if (ret <= 0)
962                         break;
963                 ret = swap_read_page(handle, data_of(*snapshot), &bio);
964                 if (ret)
965                         break;
966                 if (snapshot->sync_read)
967                         ret = hib_wait_on_bio_chain(&bio);
968                 if (ret)
969                         break;
970                 if (!(nr_pages % m))
971                         printk("\b\b\b\b%3d%%", nr_pages / m);
972                 nr_pages++;
973         }
974         err2 = hib_wait_on_bio_chain(&bio);
975         do_gettimeofday(&stop);
976         if (!ret)
977                 ret = err2;
978         if (!ret) {
979                 printk("\b\b\b\bdone\n");
980                 snapshot_write_finalize(snapshot);
981                 if (!snapshot_image_loaded(snapshot))
982                         ret = -ENODATA;
983         } else
984                 printk("\n");
985         swsusp_show_speed(&start, &stop, nr_to_read, "Read");
986         return ret;
987 }
988
989 /**
990  * Structure used for LZO data decompression.
991  */
992 struct dec_data {
993         struct task_struct *thr;                  /* thread */
994         atomic_t ready;                           /* ready to start flag */
995         atomic_t stop;                            /* ready to stop flag */
996         int ret;                                  /* return code */
997         wait_queue_head_t go;                     /* start decompression */
998         wait_queue_head_t done;                   /* decompression done */
999         size_t unc_len;                           /* uncompressed length */
1000         size_t cmp_len;                           /* compressed length */
1001         unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
1002         unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
1003 };
1004
1005 /**
1006  * Deompression function that runs in its own thread.
1007  */
1008 static int lzo_decompress_threadfn(void *data)
1009 {
1010         struct dec_data *d = data;
1011
1012         while (1) {
1013                 wait_event(d->go, atomic_read(&d->ready) ||
1014                                   kthread_should_stop());
1015                 if (kthread_should_stop()) {
1016                         d->thr = NULL;
1017                         d->ret = -1;
1018                         atomic_set(&d->stop, 1);
1019                         wake_up(&d->done);
1020                         break;
1021                 }
1022                 atomic_set(&d->ready, 0);
1023
1024                 d->unc_len = LZO_UNC_SIZE;
1025                 d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
1026                                                d->unc, &d->unc_len);
1027                 atomic_set(&d->stop, 1);
1028                 wake_up(&d->done);
1029         }
1030         return 0;
1031 }
1032
1033 /**
1034  * load_image_lzo - Load compressed image data and decompress them with LZO.
1035  * @handle: Swap map handle to use for loading data.
1036  * @snapshot: Image to copy uncompressed data into.
1037  * @nr_to_read: Number of pages to load.
1038  */
1039 static int load_image_lzo(struct swap_map_handle *handle,
1040                           struct snapshot_handle *snapshot,
1041                           unsigned int nr_to_read)
1042 {
1043         unsigned int m;
1044         int ret = 0;
1045         int eof = 0;
1046         struct bio *bio;
1047         struct timeval start;
1048         struct timeval stop;
1049         unsigned nr_pages;
1050         size_t off;
1051         unsigned i, thr, run_threads, nr_threads;
1052         unsigned ring = 0, pg = 0, ring_size = 0,
1053                  have = 0, want, need, asked = 0;
1054         unsigned long read_pages;
1055         unsigned char **page = NULL;
1056         struct dec_data *data = NULL;
1057         struct crc_data *crc = NULL;
1058
1059         /*
1060          * We'll limit the number of threads for decompression to limit memory
1061          * footprint.
1062          */
1063         nr_threads = num_online_cpus() - 1;
1064         nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
1065
1066         page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
1067         if (!page) {
1068                 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
1069                 ret = -ENOMEM;
1070                 goto out_clean;
1071         }
1072
1073         data = vmalloc(sizeof(*data) * nr_threads);
1074         if (!data) {
1075                 printk(KERN_ERR "PM: Failed to allocate LZO data\n");
1076                 ret = -ENOMEM;
1077                 goto out_clean;
1078         }
1079         for (thr = 0; thr < nr_threads; thr++)
1080                 memset(&data[thr], 0, offsetof(struct dec_data, go));
1081
1082         crc = kmalloc(sizeof(*crc), GFP_KERNEL);
1083         if (!crc) {
1084                 printk(KERN_ERR "PM: Failed to allocate crc\n");
1085                 ret = -ENOMEM;
1086                 goto out_clean;
1087         }
1088         memset(crc, 0, offsetof(struct crc_data, go));
1089
1090         /*
1091          * Start the decompression threads.
1092          */
1093         for (thr = 0; thr < nr_threads; thr++) {
1094                 init_waitqueue_head(&data[thr].go);
1095                 init_waitqueue_head(&data[thr].done);
1096
1097                 data[thr].thr = kthread_run(lzo_decompress_threadfn,
1098                                             &data[thr],
1099                                             "image_decompress/%u", thr);
1100                 if (IS_ERR(data[thr].thr)) {
1101                         data[thr].thr = NULL;
1102                         printk(KERN_ERR
1103                                "PM: Cannot start decompression threads\n");
1104                         ret = -ENOMEM;
1105                         goto out_clean;
1106                 }
1107         }
1108
1109         /*
1110          * Start the CRC32 thread.
1111          */
1112         init_waitqueue_head(&crc->go);
1113         init_waitqueue_head(&crc->done);
1114
1115         handle->crc32 = 0;
1116         crc->crc32 = &handle->crc32;
1117         for (thr = 0; thr < nr_threads; thr++) {
1118                 crc->unc[thr] = data[thr].unc;
1119                 crc->unc_len[thr] = &data[thr].unc_len;
1120         }
1121
1122         crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
1123         if (IS_ERR(crc->thr)) {
1124                 crc->thr = NULL;
1125                 printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
1126                 ret = -ENOMEM;
1127                 goto out_clean;
1128         }
1129
1130         /*
1131          * Adjust number of pages for read buffering, in case we are short.
1132          */
1133         read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1;
1134         read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES);
1135
1136         for (i = 0; i < read_pages; i++) {
1137                 page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
1138                                                   __GFP_WAIT | __GFP_HIGH :
1139                                                   __GFP_WAIT);
1140                 if (!page[i]) {
1141                         if (i < LZO_CMP_PAGES) {
1142                                 ring_size = i;
1143                                 printk(KERN_ERR
1144                                        "PM: Failed to allocate LZO pages\n");
1145                                 ret = -ENOMEM;
1146                                 goto out_clean;
1147                         } else {
1148                                 break;
1149                         }
1150                 }
1151         }
1152         want = ring_size = i;
1153
1154         printk(KERN_INFO
1155                 "PM: Using %u thread(s) for decompression.\n"
1156                 "PM: Loading and decompressing image data (%u pages) ...     ",
1157                 nr_threads, nr_to_read);
1158         m = nr_to_read / 100;
1159         if (!m)
1160                 m = 1;
1161         nr_pages = 0;
1162         bio = NULL;
1163         do_gettimeofday(&start);
1164
1165         ret = snapshot_write_next(snapshot);
1166         if (ret <= 0)
1167                 goto out_finish;
1168
1169         for(;;) {
1170                 for (i = 0; !eof && i < want; i++) {
1171                         ret = swap_read_page(handle, page[ring], &bio);
1172                         if (ret) {
1173                                 /*
1174                                  * On real read error, finish. On end of data,
1175                                  * set EOF flag and just exit the read loop.
1176                                  */
1177                                 if (handle->cur &&
1178                                     handle->cur->entries[handle->k]) {
1179                                         goto out_finish;
1180                                 } else {
1181                                         eof = 1;
1182                                         break;
1183                                 }
1184                         }
1185                         if (++ring >= ring_size)
1186                                 ring = 0;
1187                 }
1188                 asked += i;
1189                 want -= i;
1190
1191                 /*
1192                  * We are out of data, wait for some more.
1193                  */
1194                 if (!have) {
1195                         if (!asked)
1196                                 break;
1197
1198                         ret = hib_wait_on_bio_chain(&bio);
1199                         if (ret)
1200                                 goto out_finish;
1201                         have += asked;
1202                         asked = 0;
1203                         if (eof)
1204                                 eof = 2;
1205                 }
1206
1207                 if (crc->run_threads) {
1208                         wait_event(crc->done, atomic_read(&crc->stop));
1209                         atomic_set(&crc->stop, 0);
1210                         crc->run_threads = 0;
1211                 }
1212
1213                 for (thr = 0; have && thr < nr_threads; thr++) {
1214                         data[thr].cmp_len = *(size_t *)page[pg];
1215                         if (unlikely(!data[thr].cmp_len ||
1216                                      data[thr].cmp_len >
1217                                      lzo1x_worst_compress(LZO_UNC_SIZE))) {
1218                                 printk(KERN_ERR
1219                                        "PM: Invalid LZO compressed length\n");
1220                                 ret = -1;
1221                                 goto out_finish;
1222                         }
1223
1224                         need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
1225                                             PAGE_SIZE);
1226                         if (need > have) {
1227                                 if (eof > 1) {
1228                                         ret = -1;
1229                                         goto out_finish;
1230                                 }
1231                                 break;
1232                         }
1233
1234                         for (off = 0;
1235                              off < LZO_HEADER + data[thr].cmp_len;
1236                              off += PAGE_SIZE) {
1237                                 memcpy(data[thr].cmp + off,
1238                                        page[pg], PAGE_SIZE);
1239                                 have--;
1240                                 want++;
1241                                 if (++pg >= ring_size)
1242                                         pg = 0;
1243                         }
1244
1245                         atomic_set(&data[thr].ready, 1);
1246                         wake_up(&data[thr].go);
1247                 }
1248
1249                 /*
1250                  * Wait for more data while we are decompressing.
1251                  */
1252                 if (have < LZO_CMP_PAGES && asked) {
1253                         ret = hib_wait_on_bio_chain(&bio);
1254                         if (ret)
1255                                 goto out_finish;
1256                         have += asked;
1257                         asked = 0;
1258                         if (eof)
1259                                 eof = 2;
1260                 }
1261
1262                 for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
1263                         wait_event(data[thr].done,
1264                                    atomic_read(&data[thr].stop));
1265                         atomic_set(&data[thr].stop, 0);
1266
1267                         ret = data[thr].ret;
1268
1269                         if (ret < 0) {
1270                                 printk(KERN_ERR
1271                                        "PM: LZO decompression failed\n");
1272                                 goto out_finish;
1273                         }
1274
1275                         if (unlikely(!data[thr].unc_len ||
1276                                      data[thr].unc_len > LZO_UNC_SIZE ||
1277                                      data[thr].unc_len & (PAGE_SIZE - 1))) {
1278                                 printk(KERN_ERR
1279                                        "PM: Invalid LZO uncompressed length\n");
1280                                 ret = -1;
1281                                 goto out_finish;
1282                         }
1283
1284                         for (off = 0;
1285                              off < data[thr].unc_len; off += PAGE_SIZE) {
1286                                 memcpy(data_of(*snapshot),
1287                                        data[thr].unc + off, PAGE_SIZE);
1288
1289                                 if (!(nr_pages % m))
1290                                         printk("\b\b\b\b%3d%%", nr_pages / m);
1291                                 nr_pages++;
1292
1293                                 ret = snapshot_write_next(snapshot);
1294                                 if (ret <= 0) {
1295                                         crc->run_threads = thr + 1;
1296                                         atomic_set(&crc->ready, 1);
1297                                         wake_up(&crc->go);
1298                                         goto out_finish;
1299                                 }
1300                         }
1301                 }
1302
1303                 crc->run_threads = thr;
1304                 atomic_set(&crc->ready, 1);
1305                 wake_up(&crc->go);
1306         }
1307
1308 out_finish:
1309         if (crc->run_threads) {
1310                 wait_event(crc->done, atomic_read(&crc->stop));
1311                 atomic_set(&crc->stop, 0);
1312         }
1313         do_gettimeofday(&stop);
1314         if (!ret) {
1315                 printk("\b\b\b\bdone\n");
1316                 snapshot_write_finalize(snapshot);
1317                 if (!snapshot_image_loaded(snapshot))
1318                         ret = -ENODATA;
1319                 if (!ret) {
1320                         if (swsusp_header->flags & SF_CRC32_MODE) {
1321                                 if(handle->crc32 != swsusp_header->crc32) {
1322                                         printk(KERN_ERR
1323                                                "PM: Invalid image CRC32!\n");
1324                                         ret = -ENODATA;
1325                                 }
1326                         }
1327                 }
1328         } else
1329                 printk("\n");
1330         swsusp_show_speed(&start, &stop, nr_to_read, "Read");
1331 out_clean:
1332         for (i = 0; i < ring_size; i++)
1333                 free_page((unsigned long)page[i]);
1334         if (crc) {
1335                 if (crc->thr)
1336                         kthread_stop(crc->thr);
1337                 kfree(crc);
1338         }
1339         if (data) {
1340                 for (thr = 0; thr < nr_threads; thr++)
1341                         if (data[thr].thr)
1342                                 kthread_stop(data[thr].thr);
1343                 vfree(data);
1344         }
1345         if (page) vfree(page);
1346
1347         return ret;
1348 }
1349
1350 /**
1351  *      swsusp_read - read the hibernation image.
1352  *      @flags_p: flags passed by the "frozen" kernel in the image header should
1353  *                be written into this memory location
1354  */
1355
1356 int swsusp_read(unsigned int *flags_p)
1357 {
1358         int error;
1359         struct swap_map_handle handle;
1360         struct snapshot_handle snapshot;
1361         struct swsusp_info *header;
1362
1363         memset(&snapshot, 0, sizeof(struct snapshot_handle));
1364         error = snapshot_write_next(&snapshot);
1365         if (error < PAGE_SIZE)
1366                 return error < 0 ? error : -EFAULT;
1367         header = (struct swsusp_info *)data_of(snapshot);
1368         error = get_swap_reader(&handle, flags_p);
1369         if (error)
1370                 goto end;
1371         if (!error)
1372                 error = swap_read_page(&handle, header, NULL);
1373         if (!error) {
1374                 error = (*flags_p & SF_NOCOMPRESS_MODE) ?
1375                         load_image(&handle, &snapshot, header->pages - 1) :
1376                         load_image_lzo(&handle, &snapshot, header->pages - 1);
1377         }
1378         swap_reader_finish(&handle);
1379 end:
1380         if (!error)
1381                 pr_debug("PM: Image successfully loaded\n");
1382         else
1383                 pr_debug("PM: Error %d resuming\n", error);
1384         return error;
1385 }
1386
1387 /**
1388  *      swsusp_check - Check for swsusp signature in the resume device
1389  */
1390
1391 int swsusp_check(void)
1392 {
1393         int error;
1394
1395         hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
1396                                             FMODE_READ, NULL);
1397         if (!IS_ERR(hib_resume_bdev)) {
1398                 set_blocksize(hib_resume_bdev, PAGE_SIZE);
1399                 clear_page(swsusp_header);
1400                 error = hib_bio_read_page(swsusp_resume_block,
1401                                         swsusp_header, NULL);
1402                 if (error)
1403                         goto put;
1404
1405                 if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
1406                         memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
1407                         /* Reset swap signature now */
1408                         error = hib_bio_write_page(swsusp_resume_block,
1409                                                 swsusp_header, NULL);
1410                 } else {
1411                         error = -EINVAL;
1412                 }
1413
1414 put:
1415                 if (error)
1416                         blkdev_put(hib_resume_bdev, FMODE_READ);
1417                 else
1418                         pr_debug("PM: Image signature found, resuming\n");
1419         } else {
1420                 error = PTR_ERR(hib_resume_bdev);
1421         }
1422
1423         if (error)
1424                 pr_debug("PM: Image not found (code %d)\n", error);
1425
1426         return error;
1427 }
1428
1429 /**
1430  *      swsusp_close - close swap device.
1431  */
1432
1433 void swsusp_close(fmode_t mode)
1434 {
1435         if (IS_ERR(hib_resume_bdev)) {
1436                 pr_debug("PM: Image device not initialised\n");
1437                 return;
1438         }
1439
1440         blkdev_put(hib_resume_bdev, mode);
1441 }
1442
1443 static int swsusp_header_init(void)
1444 {
1445         swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
1446         if (!swsusp_header)
1447                 panic("Could not allocate memory for swsusp_header\n");
1448         return 0;
1449 }
1450
1451 core_initcall(swsusp_header_init);