Merge branch 'for-jeff' of git://electric-eye.fr.zoreil.com/home/romieu/linux-2.6
[sfrench/cifs-2.6.git] / kernel / power / swsusp.c
1 /*
2  * linux/kernel/power/swsusp.c
3  *
4  * This file provides code to write suspend image to swap and read it back.
5  *
6  * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
7  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
8  *
9  * This file is released under the GPLv2.
10  *
11  * I'd like to thank the following people for their work:
12  *
13  * Pavel Machek <pavel@ucw.cz>:
14  * Modifications, defectiveness pointing, being with me at the very beginning,
15  * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
16  *
17  * Steve Doddi <dirk@loth.demon.co.uk>:
18  * Support the possibility of hardware state restoring.
19  *
20  * Raph <grey.havens@earthling.net>:
21  * Support for preserving states of network devices and virtual console
22  * (including X and svgatextmode)
23  *
24  * Kurt Garloff <garloff@suse.de>:
25  * Straightened the critical function in order to prevent compilers from
26  * playing tricks with local variables.
27  *
28  * Andreas Mohr <a.mohr@mailto.de>
29  *
30  * Alex Badea <vampire@go.ro>:
31  * Fixed runaway init
32  *
33  * Rafael J. Wysocki <rjw@sisk.pl>
34  * Added the swap map data structure and reworked the handling of swap
35  *
36  * More state savers are welcome. Especially for the scsi layer...
37  *
38  * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
39  */
40
41 #include <linux/module.h>
42 #include <linux/mm.h>
43 #include <linux/suspend.h>
44 #include <linux/smp_lock.h>
45 #include <linux/file.h>
46 #include <linux/utsname.h>
47 #include <linux/version.h>
48 #include <linux/delay.h>
49 #include <linux/bitops.h>
50 #include <linux/spinlock.h>
51 #include <linux/genhd.h>
52 #include <linux/kernel.h>
53 #include <linux/major.h>
54 #include <linux/swap.h>
55 #include <linux/pm.h>
56 #include <linux/device.h>
57 #include <linux/buffer_head.h>
58 #include <linux/swapops.h>
59 #include <linux/bootmem.h>
60 #include <linux/syscalls.h>
61 #include <linux/highmem.h>
62 #include <linux/bio.h>
63
64 #include <asm/uaccess.h>
65 #include <asm/mmu_context.h>
66 #include <asm/pgtable.h>
67 #include <asm/tlbflush.h>
68 #include <asm/io.h>
69
70 #include "power.h"
71
72 /*
73  * Preferred image size in bytes (tunable via /sys/power/image_size).
74  * When it is set to N, swsusp will do its best to ensure the image
75  * size will not exceed N bytes, but if that is impossible, it will
76  * try to create the smallest image possible.
77  */
78 unsigned long image_size = 500 * 1024 * 1024;
79
80 #ifdef CONFIG_HIGHMEM
81 unsigned int count_highmem_pages(void);
82 int save_highmem(void);
83 int restore_highmem(void);
84 #else
85 static int save_highmem(void) { return 0; }
86 static int restore_highmem(void) { return 0; }
87 static unsigned int count_highmem_pages(void) { return 0; }
88 #endif
89
90 extern char resume_file[];
91
92 #define SWSUSP_SIG      "S1SUSPEND"
93
94 static struct swsusp_header {
95         char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
96         swp_entry_t image;
97         char    orig_sig[10];
98         char    sig[10];
99 } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
100
101 static struct swsusp_info swsusp_info;
102
103 /*
104  * Saving part...
105  */
106
107 static unsigned short root_swap = 0xffff;
108
109 static int mark_swapfiles(swp_entry_t start)
110 {
111         int error;
112
113         rw_swap_page_sync(READ,
114                           swp_entry(root_swap, 0),
115                           virt_to_page((unsigned long)&swsusp_header));
116         if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
117             !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
118                 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
119                 memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
120                 swsusp_header.image = start;
121                 error = rw_swap_page_sync(WRITE,
122                                           swp_entry(root_swap, 0),
123                                           virt_to_page((unsigned long)
124                                                        &swsusp_header));
125         } else {
126                 pr_debug("swsusp: Partition is not swap space.\n");
127                 error = -ENODEV;
128         }
129         return error;
130 }
131
132 /*
133  * Check whether the swap device is the specified resume
134  * device, irrespective of whether they are specified by
135  * identical names.
136  *
137  * (Thus, device inode aliasing is allowed.  You can say /dev/hda4
138  * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
139  * and they'll be considered the same device.  This is *necessary* for
140  * devfs, since the resume code can only recognize the form /dev/hda4,
141  * but the suspend code would see the long name.)
142  */
143 static inline int is_resume_device(const struct swap_info_struct *swap_info)
144 {
145         struct file *file = swap_info->swap_file;
146         struct inode *inode = file->f_dentry->d_inode;
147
148         return S_ISBLK(inode->i_mode) &&
149                 swsusp_resume_device == MKDEV(imajor(inode), iminor(inode));
150 }
151
152 static int swsusp_swap_check(void) /* This is called before saving image */
153 {
154         int i;
155
156         if (!swsusp_resume_device)
157                 return -ENODEV;
158         spin_lock(&swap_lock);
159         for (i = 0; i < MAX_SWAPFILES; i++) {
160                 if (!(swap_info[i].flags & SWP_WRITEOK))
161                         continue;
162                 if (is_resume_device(swap_info + i)) {
163                         spin_unlock(&swap_lock);
164                         root_swap = i;
165                         return 0;
166                 }
167         }
168         spin_unlock(&swap_lock);
169         return -ENODEV;
170 }
171
172 /**
173  *      write_page - Write one page to a fresh swap location.
174  *      @addr:  Address we're writing.
175  *      @loc:   Place to store the entry we used.
176  *
177  *      Allocate a new swap entry and 'sync' it. Note we discard -EIO
178  *      errors. That is an artifact left over from swsusp. It did not
179  *      check the return of rw_swap_page_sync() at all, since most pages
180  *      written back to swap would return -EIO.
181  *      This is a partial improvement, since we will at least return other
182  *      errors, though we need to eventually fix the damn code.
183  */
184 static int write_page(unsigned long addr, swp_entry_t *loc)
185 {
186         swp_entry_t entry;
187         int error = -ENOSPC;
188
189         entry = get_swap_page_of_type(root_swap);
190         if (swp_offset(entry)) {
191                 error = rw_swap_page_sync(WRITE, entry, virt_to_page(addr));
192                 if (!error || error == -EIO)
193                         *loc = entry;
194         }
195         return error;
196 }
197
198 /**
199  *      Swap map-handling functions
200  *
201  *      The swap map is a data structure used for keeping track of each page
202  *      written to the swap.  It consists of many swap_map_page structures
203  *      that contain each an array of MAP_PAGE_SIZE swap entries.
204  *      These structures are linked together with the help of either the
205  *      .next (in memory) or the .next_swap (in swap) member.
206  *
207  *      The swap map is created during suspend.  At that time we need to keep
208  *      it in memory, because we have to free all of the allocated swap
209  *      entries if an error occurs.  The memory needed is preallocated
210  *      so that we know in advance if there's enough of it.
211  *
212  *      The first swap_map_page structure is filled with the swap entries that
213  *      correspond to the first MAP_PAGE_SIZE data pages written to swap and
214  *      so on.  After the all of the data pages have been written, the order
215  *      of the swap_map_page structures in the map is reversed so that they
216  *      can be read from swap in the original order.  This causes the data
217  *      pages to be loaded in exactly the same order in which they have been
218  *      saved.
219  *
220  *      During resume we only need to use one swap_map_page structure
221  *      at a time, which means that we only need to use two memory pages for
222  *      reading the image - one for reading the swap_map_page structures
223  *      and the second for reading the data pages from swap.
224  */
225
226 #define MAP_PAGE_SIZE   ((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \
227                         / sizeof(swp_entry_t))
228
229 struct swap_map_page {
230         swp_entry_t             entries[MAP_PAGE_SIZE];
231         swp_entry_t             next_swap;
232         struct swap_map_page    *next;
233 };
234
235 static inline void free_swap_map(struct swap_map_page *swap_map)
236 {
237         struct swap_map_page *swp;
238
239         while (swap_map) {
240                 swp = swap_map->next;
241                 free_page((unsigned long)swap_map);
242                 swap_map = swp;
243         }
244 }
245
246 static struct swap_map_page *alloc_swap_map(unsigned int nr_pages)
247 {
248         struct swap_map_page *swap_map, *swp;
249         unsigned n = 0;
250
251         if (!nr_pages)
252                 return NULL;
253
254         pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages);
255         swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
256         swp = swap_map;
257         for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) {
258                 swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
259                 swp = swp->next;
260                 if (!swp) {
261                         free_swap_map(swap_map);
262                         return NULL;
263                 }
264         }
265         return swap_map;
266 }
267
268 /**
269  *      reverse_swap_map - reverse the order of pages in the swap map
270  *      @swap_map
271  */
272
273 static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map)
274 {
275         struct swap_map_page *prev, *next;
276
277         prev = NULL;
278         while (swap_map) {
279                 next = swap_map->next;
280                 swap_map->next = prev;
281                 prev = swap_map;
282                 swap_map = next;
283         }
284         return prev;
285 }
286
287 /**
288  *      free_swap_map_entries - free the swap entries allocated to store
289  *      the swap map @swap_map (this is only called in case of an error)
290  */
291 static inline void free_swap_map_entries(struct swap_map_page *swap_map)
292 {
293         while (swap_map) {
294                 if (swap_map->next_swap.val)
295                         swap_free(swap_map->next_swap);
296                 swap_map = swap_map->next;
297         }
298 }
299
300 /**
301  *      save_swap_map - save the swap map used for tracing the data pages
302  *      stored in the swap
303  */
304
305 static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start)
306 {
307         swp_entry_t entry = (swp_entry_t){0};
308         int error;
309
310         while (swap_map) {
311                 swap_map->next_swap = entry;
312                 if ((error = write_page((unsigned long)swap_map, &entry)))
313                         return error;
314                 swap_map = swap_map->next;
315         }
316         *start = entry;
317         return 0;
318 }
319
320 /**
321  *      free_image_entries - free the swap entries allocated to store
322  *      the image data pages (this is only called in case of an error)
323  */
324
325 static inline void free_image_entries(struct swap_map_page *swp)
326 {
327         unsigned k;
328
329         while (swp) {
330                 for (k = 0; k < MAP_PAGE_SIZE; k++)
331                         if (swp->entries[k].val)
332                                 swap_free(swp->entries[k]);
333                 swp = swp->next;
334         }
335 }
336
337 /**
338  *      The swap_map_handle structure is used for handling the swap map in
339  *      a file-alike way
340  */
341
342 struct swap_map_handle {
343         struct swap_map_page *cur;
344         unsigned int k;
345 };
346
347 static inline void init_swap_map_handle(struct swap_map_handle *handle,
348                                         struct swap_map_page *map)
349 {
350         handle->cur = map;
351         handle->k = 0;
352 }
353
354 static inline int swap_map_write_page(struct swap_map_handle *handle,
355                                       unsigned long addr)
356 {
357         int error;
358
359         error = write_page(addr, handle->cur->entries + handle->k);
360         if (error)
361                 return error;
362         if (++handle->k >= MAP_PAGE_SIZE) {
363                 handle->cur = handle->cur->next;
364                 handle->k = 0;
365         }
366         return 0;
367 }
368
369 /**
370  *      save_image_data - save the data pages pointed to by the PBEs
371  *      from the list @pblist using the swap map handle @handle
372  *      (assume there are @nr_pages data pages to save)
373  */
374
375 static int save_image_data(struct pbe *pblist,
376                            struct swap_map_handle *handle,
377                            unsigned int nr_pages)
378 {
379         unsigned int m;
380         struct pbe *p;
381         int error = 0;
382
383         printk("Saving image data pages (%u pages) ...     ", nr_pages);
384         m = nr_pages / 100;
385         if (!m)
386                 m = 1;
387         nr_pages = 0;
388         for_each_pbe (p, pblist) {
389                 error = swap_map_write_page(handle, p->address);
390                 if (error)
391                         break;
392                 if (!(nr_pages % m))
393                         printk("\b\b\b\b%3d%%", nr_pages / m);
394                 nr_pages++;
395         }
396         if (!error)
397                 printk("\b\b\b\bdone\n");
398         return error;
399 }
400
401 static void dump_info(void)
402 {
403         pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
404         pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
405         pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
406         pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
407         pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
408         pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
409         pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
410         pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
411         pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
412         pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
413         pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages);
414 }
415
416 static void init_header(unsigned int nr_pages)
417 {
418         memset(&swsusp_info, 0, sizeof(swsusp_info));
419         swsusp_info.version_code = LINUX_VERSION_CODE;
420         swsusp_info.num_physpages = num_physpages;
421         memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
422
423         swsusp_info.cpus = num_online_cpus();
424         swsusp_info.image_pages = nr_pages;
425         swsusp_info.pages = nr_pages +
426                 ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
427 }
428
429 /**
430  *      pack_orig_addresses - the .orig_address fields of the PBEs from the
431  *      list starting at @pbe are stored in the array @buf[] (1 page)
432  */
433
434 static inline struct pbe *pack_orig_addresses(unsigned long *buf,
435                                               struct pbe *pbe)
436 {
437         int j;
438
439         for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
440                 buf[j] = pbe->orig_address;
441                 pbe = pbe->next;
442         }
443         if (!pbe)
444                 for (; j < PAGE_SIZE / sizeof(long); j++)
445                         buf[j] = 0;
446         return pbe;
447 }
448
449 /**
450  *      save_image_metadata - save the .orig_address fields of the PBEs
451  *      from the list @pblist using the swap map handle @handle
452  */
453
454 static int save_image_metadata(struct pbe *pblist,
455                                struct swap_map_handle *handle)
456 {
457         unsigned long *buf;
458         unsigned int n = 0;
459         struct pbe *p;
460         int error = 0;
461
462         printk("Saving image metadata ... ");
463         buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
464         if (!buf)
465                 return -ENOMEM;
466         p = pblist;
467         while (p) {
468                 p = pack_orig_addresses(buf, p);
469                 error = swap_map_write_page(handle, (unsigned long)buf);
470                 if (error)
471                         break;
472                 n++;
473         }
474         free_page((unsigned long)buf);
475         if (!error)
476                 printk("done (%u pages saved)\n", n);
477         return error;
478 }
479
480 /**
481  *      enough_swap - Make sure we have enough swap to save the image.
482  *
483  *      Returns TRUE or FALSE after checking the total amount of swap
484  *      space avaiable from the resume partition.
485  */
486
487 static int enough_swap(unsigned int nr_pages)
488 {
489         unsigned int free_swap = swap_info[root_swap].pages -
490                 swap_info[root_swap].inuse_pages;
491
492         pr_debug("swsusp: free swap pages: %u\n", free_swap);
493         return free_swap > (nr_pages + PAGES_FOR_IO +
494                 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
495 }
496
497 /**
498  *      swsusp_write - Write entire image and metadata.
499  *
500  *      It is important _NOT_ to umount filesystems at this point. We want
501  *      them synced (in case something goes wrong) but we DO not want to mark
502  *      filesystem clean: it is not. (And it does not matter, if we resume
503  *      correctly, we'll mark system clean, anyway.)
504  */
505
506 int swsusp_write(struct pbe *pblist, unsigned int nr_pages)
507 {
508         struct swap_map_page *swap_map;
509         struct swap_map_handle handle;
510         swp_entry_t start;
511         int error;
512
513         if ((error = swsusp_swap_check())) {
514                 printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
515                 return error;
516         }
517         if (!enough_swap(nr_pages)) {
518                 printk(KERN_ERR "swsusp: Not enough free swap\n");
519                 return -ENOSPC;
520         }
521
522         init_header(nr_pages);
523         swap_map = alloc_swap_map(swsusp_info.pages);
524         if (!swap_map)
525                 return -ENOMEM;
526         init_swap_map_handle(&handle, swap_map);
527
528         error = swap_map_write_page(&handle, (unsigned long)&swsusp_info);
529         if (!error)
530                 error = save_image_metadata(pblist, &handle);
531         if (!error)
532                 error = save_image_data(pblist, &handle, nr_pages);
533         if (error)
534                 goto Free_image_entries;
535
536         swap_map = reverse_swap_map(swap_map);
537         error = save_swap_map(swap_map, &start);
538         if (error)
539                 goto Free_map_entries;
540
541         dump_info();
542         printk( "S" );
543         error = mark_swapfiles(start);
544         printk( "|\n" );
545         if (error)
546                 goto Free_map_entries;
547
548 Free_swap_map:
549         free_swap_map(swap_map);
550         return error;
551
552 Free_map_entries:
553         free_swap_map_entries(swap_map);
554 Free_image_entries:
555         free_image_entries(swap_map);
556         goto Free_swap_map;
557 }
558
559 /**
560  *      swsusp_shrink_memory -  Try to free as much memory as needed
561  *
562  *      ... but do not OOM-kill anyone
563  *
564  *      Notice: all userland should be stopped before it is called, or
565  *      livelock is possible.
566  */
567
568 #define SHRINK_BITE     10000
569
570 int swsusp_shrink_memory(void)
571 {
572         long size, tmp;
573         struct zone *zone;
574         unsigned long pages = 0;
575         unsigned int i = 0;
576         char *p = "-\\|/";
577
578         printk("Shrinking memory...  ");
579         do {
580                 size = 2 * count_highmem_pages();
581                 size += size / 50 + count_data_pages();
582                 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
583                         PAGES_FOR_IO;
584                 tmp = size;
585                 for_each_zone (zone)
586                         if (!is_highmem(zone))
587                                 tmp -= zone->free_pages;
588                 if (tmp > 0) {
589                         tmp = shrink_all_memory(SHRINK_BITE);
590                         if (!tmp)
591                                 return -ENOMEM;
592                         pages += tmp;
593                 } else if (size > image_size / PAGE_SIZE) {
594                         tmp = shrink_all_memory(SHRINK_BITE);
595                         pages += tmp;
596                 }
597                 printk("\b%c", p[i++%4]);
598         } while (tmp > 0);
599         printk("\bdone (%lu pages freed)\n", pages);
600
601         return 0;
602 }
603
604 int swsusp_suspend(void)
605 {
606         int error;
607
608         if ((error = arch_prepare_suspend()))
609                 return error;
610         local_irq_disable();
611         /* At this point, device_suspend() has been called, but *not*
612          * device_power_down(). We *must* device_power_down() now.
613          * Otherwise, drivers for some devices (e.g. interrupt controllers)
614          * become desynchronized with the actual state of the hardware
615          * at resume time, and evil weirdness ensues.
616          */
617         if ((error = device_power_down(PMSG_FREEZE))) {
618                 printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
619                 goto Enable_irqs;
620         }
621
622         if ((error = save_highmem())) {
623                 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
624                 goto Restore_highmem;
625         }
626
627         save_processor_state();
628         if ((error = swsusp_arch_suspend()))
629                 printk(KERN_ERR "Error %d suspending\n", error);
630         /* Restore control flow magically appears here */
631         restore_processor_state();
632 Restore_highmem:
633         restore_highmem();
634         device_power_up();
635 Enable_irqs:
636         local_irq_enable();
637         return error;
638 }
639
640 int swsusp_resume(void)
641 {
642         int error;
643         local_irq_disable();
644         if (device_power_down(PMSG_FREEZE))
645                 printk(KERN_ERR "Some devices failed to power down, very bad\n");
646         /* We'll ignore saved state, but this gets preempt count (etc) right */
647         save_processor_state();
648         error = swsusp_arch_resume();
649         /* Code below is only ever reached in case of failure. Otherwise
650          * execution continues at place where swsusp_arch_suspend was called
651          */
652         BUG_ON(!error);
653         /* The only reason why swsusp_arch_resume() can fail is memory being
654          * very tight, so we have to free it as soon as we can to avoid
655          * subsequent failures
656          */
657         swsusp_free();
658         restore_processor_state();
659         restore_highmem();
660         touch_softlockup_watchdog();
661         device_power_up();
662         local_irq_enable();
663         return error;
664 }
665
666 /**
667  *      mark_unsafe_pages - mark the pages that cannot be used for storing
668  *      the image during resume, because they conflict with the pages that
669  *      had been used before suspend
670  */
671
672 static void mark_unsafe_pages(struct pbe *pblist)
673 {
674         struct zone *zone;
675         unsigned long zone_pfn;
676         struct pbe *p;
677
678         if (!pblist) /* a sanity check */
679                 return;
680
681         /* Clear page flags */
682         for_each_zone (zone) {
683                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
684                         if (pfn_valid(zone_pfn + zone->zone_start_pfn))
685                                 ClearPageNosaveFree(pfn_to_page(zone_pfn +
686                                         zone->zone_start_pfn));
687         }
688
689         /* Mark orig addresses */
690         for_each_pbe (p, pblist)
691                 SetPageNosaveFree(virt_to_page(p->orig_address));
692
693 }
694
695 static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
696 {
697         /* We assume both lists contain the same number of elements */
698         while (src) {
699                 dst->orig_address = src->orig_address;
700                 dst = dst->next;
701                 src = src->next;
702         }
703 }
704
705 /*
706  *      Using bio to read from swap.
707  *      This code requires a bit more work than just using buffer heads
708  *      but, it is the recommended way for 2.5/2.6.
709  *      The following are to signal the beginning and end of I/O. Bios
710  *      finish asynchronously, while we want them to happen synchronously.
711  *      A simple atomic_t, and a wait loop take care of this problem.
712  */
713
714 static atomic_t io_done = ATOMIC_INIT(0);
715
716 static int end_io(struct bio *bio, unsigned int num, int err)
717 {
718         if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
719                 panic("I/O error reading memory image");
720         atomic_set(&io_done, 0);
721         return 0;
722 }
723
724 static struct block_device *resume_bdev;
725
726 /**
727  *      submit - submit BIO request.
728  *      @rw:    READ or WRITE.
729  *      @off    physical offset of page.
730  *      @page:  page we're reading or writing.
731  *
732  *      Straight from the textbook - allocate and initialize the bio.
733  *      If we're writing, make sure the page is marked as dirty.
734  *      Then submit it and wait.
735  */
736
737 static int submit(int rw, pgoff_t page_off, void *page)
738 {
739         int error = 0;
740         struct bio *bio;
741
742         bio = bio_alloc(GFP_ATOMIC, 1);
743         if (!bio)
744                 return -ENOMEM;
745         bio->bi_sector = page_off * (PAGE_SIZE >> 9);
746         bio_get(bio);
747         bio->bi_bdev = resume_bdev;
748         bio->bi_end_io = end_io;
749
750         if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
751                 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
752                 error = -EFAULT;
753                 goto Done;
754         }
755
756         if (rw == WRITE)
757                 bio_set_pages_dirty(bio);
758
759         atomic_set(&io_done, 1);
760         submit_bio(rw | (1 << BIO_RW_SYNC), bio);
761         while (atomic_read(&io_done))
762                 yield();
763
764  Done:
765         bio_put(bio);
766         return error;
767 }
768
769 static int bio_read_page(pgoff_t page_off, void *page)
770 {
771         return submit(READ, page_off, page);
772 }
773
774 static int bio_write_page(pgoff_t page_off, void *page)
775 {
776         return submit(WRITE, page_off, page);
777 }
778
779 /**
780  *      The following functions allow us to read data using a swap map
781  *      in a file-alike way
782  */
783
784 static inline void release_swap_map_reader(struct swap_map_handle *handle)
785 {
786         if (handle->cur)
787                 free_page((unsigned long)handle->cur);
788         handle->cur = NULL;
789 }
790
791 static inline int get_swap_map_reader(struct swap_map_handle *handle,
792                                       swp_entry_t start)
793 {
794         int error;
795
796         if (!swp_offset(start))
797                 return -EINVAL;
798         handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
799         if (!handle->cur)
800                 return -ENOMEM;
801         error = bio_read_page(swp_offset(start), handle->cur);
802         if (error) {
803                 release_swap_map_reader(handle);
804                 return error;
805         }
806         handle->k = 0;
807         return 0;
808 }
809
810 static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf)
811 {
812         unsigned long offset;
813         int error;
814
815         if (!handle->cur)
816                 return -EINVAL;
817         offset = swp_offset(handle->cur->entries[handle->k]);
818         if (!offset)
819                 return -EINVAL;
820         error = bio_read_page(offset, buf);
821         if (error)
822                 return error;
823         if (++handle->k >= MAP_PAGE_SIZE) {
824                 handle->k = 0;
825                 offset = swp_offset(handle->cur->next_swap);
826                 if (!offset)
827                         release_swap_map_reader(handle);
828                 else
829                         error = bio_read_page(offset, handle->cur);
830         }
831         return error;
832 }
833
834 static int check_header(void)
835 {
836         char *reason = NULL;
837
838         dump_info();
839         if (swsusp_info.version_code != LINUX_VERSION_CODE)
840                 reason = "kernel version";
841         if (swsusp_info.num_physpages != num_physpages)
842                 reason = "memory size";
843         if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
844                 reason = "system type";
845         if (strcmp(swsusp_info.uts.release,system_utsname.release))
846                 reason = "kernel release";
847         if (strcmp(swsusp_info.uts.version,system_utsname.version))
848                 reason = "version";
849         if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
850                 reason = "machine";
851         if (reason) {
852                 printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
853                 return -EPERM;
854         }
855         return 0;
856 }
857
858 /**
859  *      load_image_data - load the image data using the swap map handle
860  *      @handle and store them using the page backup list @pblist
861  *      (assume there are @nr_pages pages to load)
862  */
863
864 static int load_image_data(struct pbe *pblist,
865                            struct swap_map_handle *handle,
866                            unsigned int nr_pages)
867 {
868         int error;
869         unsigned int m;
870         struct pbe *p;
871
872         if (!pblist)
873                 return -EINVAL;
874         printk("Loading image data pages (%u pages) ...     ", nr_pages);
875         m = nr_pages / 100;
876         if (!m)
877                 m = 1;
878         nr_pages = 0;
879         p = pblist;
880         while (p) {
881                 error = swap_map_read_page(handle, (void *)p->address);
882                 if (error)
883                         break;
884                 p = p->next;
885                 if (!(nr_pages % m))
886                         printk("\b\b\b\b%3d%%", nr_pages / m);
887                 nr_pages++;
888         }
889         if (!error)
890                 printk("\b\b\b\bdone\n");
891         return error;
892 }
893
894 /**
895  *      unpack_orig_addresses - copy the elements of @buf[] (1 page) to
896  *      the PBEs in the list starting at @pbe
897  */
898
899 static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
900                                                 struct pbe *pbe)
901 {
902         int j;
903
904         for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
905                 pbe->orig_address = buf[j];
906                 pbe = pbe->next;
907         }
908         return pbe;
909 }
910
911 /**
912  *      load_image_metadata - load the image metadata using the swap map
913  *      handle @handle and put them into the PBEs in the list @pblist
914  */
915
916 static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle)
917 {
918         struct pbe *p;
919         unsigned long *buf;
920         unsigned int n = 0;
921         int error = 0;
922
923         printk("Loading image metadata ... ");
924         buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
925         if (!buf)
926                 return -ENOMEM;
927         p = pblist;
928         while (p) {
929                 error = swap_map_read_page(handle, buf);
930                 if (error)
931                         break;
932                 p = unpack_orig_addresses(buf, p);
933                 n++;
934         }
935         free_page((unsigned long)buf);
936         if (!error)
937                 printk("done (%u pages loaded)\n", n);
938         return error;
939 }
940
941 int swsusp_read(struct pbe **pblist_ptr)
942 {
943         int error;
944         struct pbe *p, *pblist;
945         struct swap_map_handle handle;
946         unsigned int nr_pages;
947
948         if (IS_ERR(resume_bdev)) {
949                 pr_debug("swsusp: block device not initialised\n");
950                 return PTR_ERR(resume_bdev);
951         }
952
953         error = get_swap_map_reader(&handle, swsusp_header.image);
954         if (!error)
955                 error = swap_map_read_page(&handle, &swsusp_info);
956         if (!error)
957                 error = check_header();
958         if (error)
959                 return error;
960         nr_pages = swsusp_info.image_pages;
961         p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0);
962         if (!p)
963                 return -ENOMEM;
964         error = load_image_metadata(p, &handle);
965         if (!error) {
966                 mark_unsafe_pages(p);
967                 pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
968                 if (pblist)
969                         copy_page_backup_list(pblist, p);
970                 free_pagedir(p);
971                 if (!pblist)
972                         error = -ENOMEM;
973
974                 /* Allocate memory for the image and read the data from swap */
975                 if (!error)
976                         error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
977                 if (!error) {
978                         release_eaten_pages();
979                         error = load_image_data(pblist, &handle, nr_pages);
980                 }
981                 if (!error)
982                         *pblist_ptr = pblist;
983         }
984         release_swap_map_reader(&handle);
985
986         blkdev_put(resume_bdev);
987
988         if (!error)
989                 pr_debug("swsusp: Reading resume file was successful\n");
990         else
991                 pr_debug("swsusp: Error %d resuming\n", error);
992         return error;
993 }
994
995 /**
996  *      swsusp_check - Check for swsusp signature in the resume device
997  */
998
999 int swsusp_check(void)
1000 {
1001         int error;
1002
1003         resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
1004         if (!IS_ERR(resume_bdev)) {
1005                 set_blocksize(resume_bdev, PAGE_SIZE);
1006                 memset(&swsusp_header, 0, sizeof(swsusp_header));
1007                 if ((error = bio_read_page(0, &swsusp_header)))
1008                         return error;
1009                 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
1010                         memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
1011                         /* Reset swap signature now */
1012                         error = bio_write_page(0, &swsusp_header);
1013                 } else {
1014                         return -EINVAL;
1015                 }
1016                 if (error)
1017                         blkdev_put(resume_bdev);
1018                 else
1019                         pr_debug("swsusp: Signature found, resuming\n");
1020         } else {
1021                 error = PTR_ERR(resume_bdev);
1022         }
1023
1024         if (error)
1025                 pr_debug("swsusp: Error %d check for resume file\n", error);
1026
1027         return error;
1028 }
1029
1030 /**
1031  *      swsusp_close - close swap device.
1032  */
1033
1034 void swsusp_close(void)
1035 {
1036         if (IS_ERR(resume_bdev)) {
1037                 pr_debug("swsusp: block device not initialised\n");
1038                 return;
1039         }
1040
1041         blkdev_put(resume_bdev);
1042 }