11a594c4ba251227c6fc25b62c307747356fccfc
[linux-2.6.git] / kernel / power / swap.c
1 /*
2  * linux/kernel/power/swap.c
3  *
4  * This file provides functions for reading the suspend image from
5  * and writing it to a swap partition.
6  *
7  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
8  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9  * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com>
10  *
11  * This file is released under the GPLv2.
12  *
13  */
14
15 #include <linux/module.h>
16 #include <linux/file.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/genhd.h>
20 #include <linux/device.h>
21 #include <linux/buffer_head.h>
22 #include <linux/bio.h>
23 #include <linux/blkdev.h>
24 #include <linux/swap.h>
25 #include <linux/swapops.h>
26 #include <linux/pm.h>
27 #include <linux/slab.h>
28 #include <linux/lzo.h>
29 #include <linux/vmalloc.h>
30 #include <linux/cpumask.h>
31 #include <linux/atomic.h>
32 #include <linux/kthread.h>
33 #include <linux/crc32.h>
34
35 #include "power.h"
36
37 #define HIBERNATE_SIG   "S1SUSPEND"
38
39 /*
40  *      The swap map is a data structure used for keeping track of each page
41  *      written to a swap partition.  It consists of many swap_map_page
42  *      structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
43  *      These structures are stored on the swap and linked together with the
44  *      help of the .next_swap member.
45  *
46  *      The swap map is created during suspend.  The swap map pages are
47  *      allocated and populated one at a time, so we only need one memory
48  *      page to set up the entire structure.
49  *
50  *      During resume we pick up all swap_map_page structures into a list.
51  */
52
53 #define MAP_PAGE_ENTRIES        (PAGE_SIZE / sizeof(sector_t) - 1)
54
55 struct swap_map_page {
56         sector_t entries[MAP_PAGE_ENTRIES];
57         sector_t next_swap;
58 };
59
60 struct swap_map_page_list {
61         struct swap_map_page *map;
62         struct swap_map_page_list *next;
63 };
64
65 /**
66  *      The swap_map_handle structure is used for handling swap in
67  *      a file-alike way
68  */
69
70 struct swap_map_handle {
71         struct swap_map_page *cur;
72         struct swap_map_page_list *maps;
73         sector_t cur_swap;
74         sector_t first_sector;
75         unsigned int k;
76         unsigned long nr_free_pages, written;
77         u32 crc32;
78 };
79
80 struct swsusp_header {
81         char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
82                       sizeof(u32)];
83         u32     crc32;
84         sector_t image;
85         unsigned int flags;     /* Flags to pass to the "boot" kernel */
86         char    orig_sig[10];
87         char    sig[10];
88 } __attribute__((packed));
89
90 static struct swsusp_header *swsusp_header;
91
92 /**
93  *      The following functions are used for tracing the allocated
94  *      swap pages, so that they can be freed in case of an error.
95  */
96
97 struct swsusp_extent {
98         struct rb_node node;
99         unsigned long start;
100         unsigned long end;
101 };
102
103 static struct rb_root swsusp_extents = RB_ROOT;
104
105 static int swsusp_extents_insert(unsigned long swap_offset)
106 {
107         struct rb_node **new = &(swsusp_extents.rb_node);
108         struct rb_node *parent = NULL;
109         struct swsusp_extent *ext;
110
111         /* Figure out where to put the new node */
112         while (*new) {
113                 ext = container_of(*new, struct swsusp_extent, node);
114                 parent = *new;
115                 if (swap_offset < ext->start) {
116                         /* Try to merge */
117                         if (swap_offset == ext->start - 1) {
118                                 ext->start--;
119                                 return 0;
120                         }
121                         new = &((*new)->rb_left);
122                 } else if (swap_offset > ext->end) {
123                         /* Try to merge */
124                         if (swap_offset == ext->end + 1) {
125                                 ext->end++;
126                                 return 0;
127                         }
128                         new = &((*new)->rb_right);
129                 } else {
130                         /* It already is in the tree */
131                         return -EINVAL;
132                 }
133         }
134         /* Add the new node and rebalance the tree. */
135         ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
136         if (!ext)
137                 return -ENOMEM;
138
139         ext->start = swap_offset;
140         ext->end = swap_offset;
141         rb_link_node(&ext->node, parent, new);
142         rb_insert_color(&ext->node, &swsusp_extents);
143         return 0;
144 }
145
146 /**
147  *      alloc_swapdev_block - allocate a swap page and register that it has
148  *      been allocated, so that it can be freed in case of an error.
149  */
150
151 sector_t alloc_swapdev_block(int swap)
152 {
153         unsigned long offset;
154
155         offset = swp_offset(get_swap_page_of_type(swap));
156         if (offset) {
157                 if (swsusp_extents_insert(offset))
158                         swap_free(swp_entry(swap, offset));
159                 else
160                         return swapdev_block(swap, offset);
161         }
162         return 0;
163 }
164
165 /**
166  *      free_all_swap_pages - free swap pages allocated for saving image data.
167  *      It also frees the extents used to register which swap entries had been
168  *      allocated.
169  */
170
171 void free_all_swap_pages(int swap)
172 {
173         struct rb_node *node;
174
175         while ((node = swsusp_extents.rb_node)) {
176                 struct swsusp_extent *ext;
177                 unsigned long offset;
178
179                 ext = container_of(node, struct swsusp_extent, node);
180                 rb_erase(node, &swsusp_extents);
181                 for (offset = ext->start; offset <= ext->end; offset++)
182                         swap_free(swp_entry(swap, offset));
183
184                 kfree(ext);
185         }
186 }
187
188 int swsusp_swap_in_use(void)
189 {
190         return (swsusp_extents.rb_node != NULL);
191 }
192
193 /*
194  * General things
195  */
196
197 static unsigned short root_swap = 0xffff;
198 struct block_device *hib_resume_bdev;
199
200 /*
201  * Saving part
202  */
203
204 static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
205 {
206         int error;
207
208         hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
209         if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
210             !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
211                 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
212                 memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
213                 swsusp_header->image = handle->first_sector;
214                 swsusp_header->flags = flags;
215                 if (flags & SF_CRC32_MODE)
216                         swsusp_header->crc32 = handle->crc32;
217                 error = hib_bio_write_page(swsusp_resume_block,
218                                         swsusp_header, NULL);
219         } else {
220                 printk(KERN_ERR "PM: Swap header not found!\n");
221                 error = -ENODEV;
222         }
223         return error;
224 }
225
226 /**
227  *      swsusp_swap_check - check if the resume device is a swap device
228  *      and get its index (if so)
229  *
230  *      This is called before saving image
231  */
232 static int swsusp_swap_check(void)
233 {
234         int res;
235
236         res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
237                         &hib_resume_bdev);
238         if (res < 0)
239                 return res;
240
241         root_swap = res;
242         res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
243         if (res)
244                 return res;
245
246         res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
247         if (res < 0)
248                 blkdev_put(hib_resume_bdev, FMODE_WRITE);
249
250         return res;
251 }
252
253 /**
254  *      write_page - Write one page to given swap location.
255  *      @buf:           Address we're writing.
256  *      @offset:        Offset of the swap page we're writing to.
257  *      @bio_chain:     Link the next write BIO here
258  */
259
260 static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
261 {
262         void *src;
263         int ret;
264
265         if (!offset)
266                 return -ENOSPC;
267
268         if (bio_chain) {
269                 src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
270                 if (src) {
271                         copy_page(src, buf);
272                 } else {
273                         ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
274                         if (ret)
275                                 return ret;
276                         src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
277                         if (src) {
278                                 copy_page(src, buf);
279                         } else {
280                                 WARN_ON_ONCE(1);
281                                 bio_chain = NULL;       /* Go synchronous */
282                                 src = buf;
283                         }
284                 }
285         } else {
286                 src = buf;
287         }
288         return hib_bio_write_page(offset, src, bio_chain);
289 }
290
291 static void release_swap_writer(struct swap_map_handle *handle)
292 {
293         if (handle->cur)
294                 free_page((unsigned long)handle->cur);
295         handle->cur = NULL;
296 }
297
298 static int get_swap_writer(struct swap_map_handle *handle)
299 {
300         int ret;
301
302         ret = swsusp_swap_check();
303         if (ret) {
304                 if (ret != -ENOSPC)
305                         printk(KERN_ERR "PM: Cannot find swap device, try "
306                                         "swapon -a.\n");
307                 return ret;
308         }
309         handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
310         if (!handle->cur) {
311                 ret = -ENOMEM;
312                 goto err_close;
313         }
314         handle->cur_swap = alloc_swapdev_block(root_swap);
315         if (!handle->cur_swap) {
316                 ret = -ENOSPC;
317                 goto err_rel;
318         }
319         handle->k = 0;
320         handle->nr_free_pages = nr_free_pages() >> 1;
321         handle->written = 0;
322         handle->first_sector = handle->cur_swap;
323         return 0;
324 err_rel:
325         release_swap_writer(handle);
326 err_close:
327         swsusp_close(FMODE_WRITE);
328         return ret;
329 }
330
331 static int swap_write_page(struct swap_map_handle *handle, void *buf,
332                                 struct bio **bio_chain)
333 {
334         int error = 0;
335         sector_t offset;
336
337         if (!handle->cur)
338                 return -EINVAL;
339         offset = alloc_swapdev_block(root_swap);
340         error = write_page(buf, offset, bio_chain);
341         if (error)
342                 return error;
343         handle->cur->entries[handle->k++] = offset;
344         if (handle->k >= MAP_PAGE_ENTRIES) {
345                 offset = alloc_swapdev_block(root_swap);
346                 if (!offset)
347                         return -ENOSPC;
348                 handle->cur->next_swap = offset;
349                 error = write_page(handle->cur, handle->cur_swap, bio_chain);
350                 if (error)
351                         goto out;
352                 clear_page(handle->cur);
353                 handle->cur_swap = offset;
354                 handle->k = 0;
355         }
356         if (bio_chain && ++handle->written > handle->nr_free_pages) {
357                 error = hib_wait_on_bio_chain(bio_chain);
358                 if (error)
359                         goto out;
360                 handle->written = 0;
361         }
362  out:
363         return error;
364 }
365
366 static int flush_swap_writer(struct swap_map_handle *handle)
367 {
368         if (handle->cur && handle->cur_swap)
369                 return write_page(handle->cur, handle->cur_swap, NULL);
370         else
371                 return -EINVAL;
372 }
373
374 static int swap_writer_finish(struct swap_map_handle *handle,
375                 unsigned int flags, int error)
376 {
377         if (!error) {
378                 flush_swap_writer(handle);
379                 printk(KERN_INFO "PM: S");
380                 error = mark_swapfiles(handle, flags);
381                 printk("|\n");
382         }
383
384         if (error)
385                 free_all_swap_pages(root_swap);
386         release_swap_writer(handle);
387         swsusp_close(FMODE_WRITE);
388
389         return error;
390 }
391
392 /* We need to remember how much compressed data we need to read. */
393 #define LZO_HEADER      sizeof(size_t)
394
395 /* Number of pages/bytes we'll compress at one time. */
396 #define LZO_UNC_PAGES   32
397 #define LZO_UNC_SIZE    (LZO_UNC_PAGES * PAGE_SIZE)
398
399 /* Number of pages/bytes we need for compressed data (worst case). */
400 #define LZO_CMP_PAGES   DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \
401                                      LZO_HEADER, PAGE_SIZE)
402 #define LZO_CMP_SIZE    (LZO_CMP_PAGES * PAGE_SIZE)
403
404 /* Maximum number of threads for compression/decompression. */
405 #define LZO_THREADS     3
406
407 /* Maximum number of pages for read buffering. */
408 #define LZO_READ_PAGES  (MAP_PAGE_ENTRIES * 8)
409
410
411 /**
412  *      save_image - save the suspend image data
413  */
414
415 static int save_image(struct swap_map_handle *handle,
416                       struct snapshot_handle *snapshot,
417                       unsigned int nr_to_write)
418 {
419         unsigned int m;
420         int ret;
421         int nr_pages;
422         int err2;
423         struct bio *bio;
424         struct timeval start;
425         struct timeval stop;
426
427         printk(KERN_INFO "PM: Saving image data pages (%u pages) ...     ",
428                 nr_to_write);
429         m = nr_to_write / 100;
430         if (!m)
431                 m = 1;
432         nr_pages = 0;
433         bio = NULL;
434         do_gettimeofday(&start);
435         while (1) {
436                 ret = snapshot_read_next(snapshot);
437                 if (ret <= 0)
438                         break;
439                 ret = swap_write_page(handle, data_of(*snapshot), &bio);
440                 if (ret)
441                         break;
442                 if (!(nr_pages % m))
443                         printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
444                 nr_pages++;
445         }
446         err2 = hib_wait_on_bio_chain(&bio);
447         do_gettimeofday(&stop);
448         if (!ret)
449                 ret = err2;
450         if (!ret)
451                 printk(KERN_CONT "\b\b\b\bdone\n");
452         else
453                 printk(KERN_CONT "\n");
454         swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
455         return ret;
456 }
457
458 /**
459  * Structure used for CRC32.
460  */
461 struct crc_data {
462         struct task_struct *thr;                  /* thread */
463         atomic_t ready;                           /* ready to start flag */
464         atomic_t stop;                            /* ready to stop flag */
465         unsigned run_threads;                     /* nr current threads */
466         wait_queue_head_t go;                     /* start crc update */
467         wait_queue_head_t done;                   /* crc update done */
468         u32 *crc32;                               /* points to handle's crc32 */
469         size_t *unc_len[LZO_THREADS];             /* uncompressed lengths */
470         unsigned char *unc[LZO_THREADS];          /* uncompressed data */
471 };
472
473 /**
474  * CRC32 update function that runs in its own thread.
475  */
476 static int crc32_threadfn(void *data)
477 {
478         struct crc_data *d = data;
479         unsigned i;
480
481         while (1) {
482                 wait_event(d->go, atomic_read(&d->ready) ||
483                                   kthread_should_stop());
484                 if (kthread_should_stop()) {
485                         d->thr = NULL;
486                         atomic_set(&d->stop, 1);
487                         wake_up(&d->done);
488                         break;
489                 }
490                 atomic_set(&d->ready, 0);
491
492                 for (i = 0; i < d->run_threads; i++)
493                         *d->crc32 = crc32_le(*d->crc32,
494                                              d->unc[i], *d->unc_len[i]);
495                 atomic_set(&d->stop, 1);
496                 wake_up(&d->done);
497         }
498         return 0;
499 }
500 /**
501  * Structure used for LZO data compression.
502  */
503 struct cmp_data {
504         struct task_struct *thr;                  /* thread */
505         atomic_t ready;                           /* ready to start flag */
506         atomic_t stop;                            /* ready to stop flag */
507         int ret;                                  /* return code */
508         wait_queue_head_t go;                     /* start compression */
509         wait_queue_head_t done;                   /* compression done */
510         size_t unc_len;                           /* uncompressed length */
511         size_t cmp_len;                           /* compressed length */
512         unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
513         unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
514         unsigned char wrk[LZO1X_1_MEM_COMPRESS];  /* compression workspace */
515 };
516
517 /**
518  * Compression function that runs in its own thread.
519  */
520 static int lzo_compress_threadfn(void *data)
521 {
522         struct cmp_data *d = data;
523
524         while (1) {
525                 wait_event(d->go, atomic_read(&d->ready) ||
526                                   kthread_should_stop());
527                 if (kthread_should_stop()) {
528                         d->thr = NULL;
529                         d->ret = -1;
530                         atomic_set(&d->stop, 1);
531                         wake_up(&d->done);
532                         break;
533                 }
534                 atomic_set(&d->ready, 0);
535
536                 d->ret = lzo1x_1_compress(d->unc, d->unc_len,
537                                           d->cmp + LZO_HEADER, &d->cmp_len,
538                                           d->wrk);
539                 atomic_set(&d->stop, 1);
540                 wake_up(&d->done);
541         }
542         return 0;
543 }
544
545 /**
546  * save_image_lzo - Save the suspend image data compressed with LZO.
547  * @handle: Swap mam handle to use for saving the image.
548  * @snapshot: Image to read data from.
549  * @nr_to_write: Number of pages to save.
550  */
551 static int save_image_lzo(struct swap_map_handle *handle,
552                           struct snapshot_handle *snapshot,
553                           unsigned int nr_to_write)
554 {
555         unsigned int m;
556         int ret = 0;
557         int nr_pages;
558         int err2;
559         struct bio *bio;
560         struct timeval start;
561         struct timeval stop;
562         size_t off;
563         unsigned thr, run_threads, nr_threads;
564         unsigned char *page = NULL;
565         struct cmp_data *data = NULL;
566         struct crc_data *crc = NULL;
567
568         /*
569          * We'll limit the number of threads for compression to limit memory
570          * footprint.
571          */
572         nr_threads = num_online_cpus() - 1;
573         nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
574
575         page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
576         if (!page) {
577                 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
578                 ret = -ENOMEM;
579                 goto out_clean;
580         }
581
582         data = vmalloc(sizeof(*data) * nr_threads);
583         if (!data) {
584                 printk(KERN_ERR "PM: Failed to allocate LZO data\n");
585                 ret = -ENOMEM;
586                 goto out_clean;
587         }
588         for (thr = 0; thr < nr_threads; thr++)
589                 memset(&data[thr], 0, offsetof(struct cmp_data, go));
590
591         crc = kmalloc(sizeof(*crc), GFP_KERNEL);
592         if (!crc) {
593                 printk(KERN_ERR "PM: Failed to allocate crc\n");
594                 ret = -ENOMEM;
595                 goto out_clean;
596         }
597         memset(crc, 0, offsetof(struct crc_data, go));
598
599         /*
600          * Start the compression threads.
601          */
602         for (thr = 0; thr < nr_threads; thr++) {
603                 init_waitqueue_head(&data[thr].go);
604                 init_waitqueue_head(&data[thr].done);
605
606                 data[thr].thr = kthread_run(lzo_compress_threadfn,
607                                             &data[thr],
608                                             "image_compress/%u", thr);
609                 if (IS_ERR(data[thr].thr)) {
610                         data[thr].thr = NULL;
611                         printk(KERN_ERR
612                                "PM: Cannot start compression threads\n");
613                         ret = -ENOMEM;
614                         goto out_clean;
615                 }
616         }
617
618         /*
619          * Adjust number of free pages after all allocations have been done.
620          * We don't want to run out of pages when writing.
621          */
622         handle->nr_free_pages = nr_free_pages() >> 1;
623
624         /*
625          * Start the CRC32 thread.
626          */
627         init_waitqueue_head(&crc->go);
628         init_waitqueue_head(&crc->done);
629
630         handle->crc32 = 0;
631         crc->crc32 = &handle->crc32;
632         for (thr = 0; thr < nr_threads; thr++) {
633                 crc->unc[thr] = data[thr].unc;
634                 crc->unc_len[thr] = &data[thr].unc_len;
635         }
636
637         crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
638         if (IS_ERR(crc->thr)) {
639                 crc->thr = NULL;
640                 printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
641                 ret = -ENOMEM;
642                 goto out_clean;
643         }
644
645         printk(KERN_INFO
646                 "PM: Using %u thread(s) for compression.\n"
647                 "PM: Compressing and saving image data (%u pages) ...     ",
648                 nr_threads, nr_to_write);
649         m = nr_to_write / 100;
650         if (!m)
651                 m = 1;
652         nr_pages = 0;
653         bio = NULL;
654         do_gettimeofday(&start);
655         for (;;) {
656                 for (thr = 0; thr < nr_threads; thr++) {
657                         for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
658                                 ret = snapshot_read_next(snapshot);
659                                 if (ret < 0)
660                                         goto out_finish;
661
662                                 if (!ret)
663                                         break;
664
665                                 memcpy(data[thr].unc + off,
666                                        data_of(*snapshot), PAGE_SIZE);
667
668                                 if (!(nr_pages % m))
669                                         printk(KERN_CONT "\b\b\b\b%3d%%",
670                                                nr_pages / m);
671                                 nr_pages++;
672                         }
673                         if (!off)
674                                 break;
675
676                         data[thr].unc_len = off;
677
678                         atomic_set(&data[thr].ready, 1);
679                         wake_up(&data[thr].go);
680                 }
681
682                 if (!thr)
683                         break;
684
685                 crc->run_threads = thr;
686                 atomic_set(&crc->ready, 1);
687                 wake_up(&crc->go);
688
689                 for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
690                         wait_event(data[thr].done,
691                                    atomic_read(&data[thr].stop));
692                         atomic_set(&data[thr].stop, 0);
693
694                         ret = data[thr].ret;
695
696                         if (ret < 0) {
697                                 printk(KERN_ERR "PM: LZO compression failed\n");
698                                 goto out_finish;
699                         }
700
701                         if (unlikely(!data[thr].cmp_len ||
702                                      data[thr].cmp_len >
703                                      lzo1x_worst_compress(data[thr].unc_len))) {
704                                 printk(KERN_ERR
705                                        "PM: Invalid LZO compressed length\n");
706                                 ret = -1;
707                                 goto out_finish;
708                         }
709
710                         *(size_t *)data[thr].cmp = data[thr].cmp_len;
711
712                         /*
713                          * Given we are writing one page at a time to disk, we
714                          * copy that much from the buffer, although the last
715                          * bit will likely be smaller than full page. This is
716                          * OK - we saved the length of the compressed data, so
717                          * any garbage at the end will be discarded when we
718                          * read it.
719                          */
720                         for (off = 0;
721                              off < LZO_HEADER + data[thr].cmp_len;
722                              off += PAGE_SIZE) {
723                                 memcpy(page, data[thr].cmp + off, PAGE_SIZE);
724
725                                 ret = swap_write_page(handle, page, &bio);
726                                 if (ret)
727                                         goto out_finish;
728                         }
729                 }
730
731                 wait_event(crc->done, atomic_read(&crc->stop));
732                 atomic_set(&crc->stop, 0);
733         }
734
735 out_finish:
736         err2 = hib_wait_on_bio_chain(&bio);
737         do_gettimeofday(&stop);
738         if (!ret)
739                 ret = err2;
740         if (!ret) {
741                 printk(KERN_CONT "\b\b\b\bdone\n");
742         } else {
743                 printk(KERN_CONT "\n");
744         }
745         swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
746 out_clean:
747         if (crc) {
748                 if (crc->thr)
749                         kthread_stop(crc->thr);
750                 kfree(crc);
751         }
752         if (data) {
753                 for (thr = 0; thr < nr_threads; thr++)
754                         if (data[thr].thr)
755                                 kthread_stop(data[thr].thr);
756                 vfree(data);
757         }
758         if (page) free_page((unsigned long)page);
759
760         return ret;
761 }
762
763 /**
764  *      enough_swap - Make sure we have enough swap to save the image.
765  *
766  *      Returns TRUE or FALSE after checking the total amount of swap
767  *      space avaiable from the resume partition.
768  */
769
770 static int enough_swap(unsigned int nr_pages, unsigned int flags)
771 {
772         unsigned int free_swap = count_swap_pages(root_swap, 1);
773         unsigned int required;
774
775         pr_debug("PM: Free swap pages: %u\n", free_swap);
776
777         required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ?
778                 nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1);
779         return free_swap > required;
780 }
781
782 /**
783  *      swsusp_write - Write entire image and metadata.
784  *      @flags: flags to pass to the "boot" kernel in the image header
785  *
786  *      It is important _NOT_ to umount filesystems at this point. We want
787  *      them synced (in case something goes wrong) but we DO not want to mark
788  *      filesystem clean: it is not. (And it does not matter, if we resume
789  *      correctly, we'll mark system clean, anyway.)
790  */
791
792 int swsusp_write(unsigned int flags)
793 {
794         struct swap_map_handle handle;
795         struct snapshot_handle snapshot;
796         struct swsusp_info *header;
797         unsigned long pages;
798         int error;
799
800         pages = snapshot_get_image_size();
801         error = get_swap_writer(&handle);
802         if (error) {
803                 printk(KERN_ERR "PM: Cannot get swap writer\n");
804                 return error;
805         }
806         if (!enough_swap(pages, flags)) {
807                 printk(KERN_ERR "PM: Not enough free swap\n");
808                 error = -ENOSPC;
809                 goto out_finish;
810         }
811         memset(&snapshot, 0, sizeof(struct snapshot_handle));
812         error = snapshot_read_next(&snapshot);
813         if (error < PAGE_SIZE) {
814                 if (error >= 0)
815                         error = -EFAULT;
816
817                 goto out_finish;
818         }
819         header = (struct swsusp_info *)data_of(snapshot);
820         error = swap_write_page(&handle, header, NULL);
821         if (!error) {
822                 error = (flags & SF_NOCOMPRESS_MODE) ?
823                         save_image(&handle, &snapshot, pages - 1) :
824                         save_image_lzo(&handle, &snapshot, pages - 1);
825         }
826 out_finish:
827         error = swap_writer_finish(&handle, flags, error);
828         return error;
829 }
830
831 /**
832  *      The following functions allow us to read data using a swap map
833  *      in a file-alike way
834  */
835
836 static void release_swap_reader(struct swap_map_handle *handle)
837 {
838         struct swap_map_page_list *tmp;
839
840         while (handle->maps) {
841                 if (handle->maps->map)
842                         free_page((unsigned long)handle->maps->map);
843                 tmp = handle->maps;
844                 handle->maps = handle->maps->next;
845                 kfree(tmp);
846         }
847         handle->cur = NULL;
848 }
849
850 static int get_swap_reader(struct swap_map_handle *handle,
851                 unsigned int *flags_p)
852 {
853         int error;
854         struct swap_map_page_list *tmp, *last;
855         sector_t offset;
856
857         *flags_p = swsusp_header->flags;
858
859         if (!swsusp_header->image) /* how can this happen? */
860                 return -EINVAL;
861
862         handle->cur = NULL;
863         last = handle->maps = NULL;
864         offset = swsusp_header->image;
865         while (offset) {
866                 tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL);
867                 if (!tmp) {
868                         release_swap_reader(handle);
869                         return -ENOMEM;
870                 }
871                 memset(tmp, 0, sizeof(*tmp));
872                 if (!handle->maps)
873                         handle->maps = tmp;
874                 if (last)
875                         last->next = tmp;
876                 last = tmp;
877
878                 tmp->map = (struct swap_map_page *)
879                            __get_free_page(__GFP_WAIT | __GFP_HIGH);
880                 if (!tmp->map) {
881                         release_swap_reader(handle);
882                         return -ENOMEM;
883                 }
884
885                 error = hib_bio_read_page(offset, tmp->map, NULL);
886                 if (error) {
887                         release_swap_reader(handle);
888                         return error;
889                 }
890                 offset = tmp->map->next_swap;
891         }
892         handle->k = 0;
893         handle->cur = handle->maps->map;
894         return 0;
895 }
896
897 static int swap_read_page(struct swap_map_handle *handle, void *buf,
898                                 struct bio **bio_chain)
899 {
900         sector_t offset;
901         int error;
902         struct swap_map_page_list *tmp;
903
904         if (!handle->cur)
905                 return -EINVAL;
906         offset = handle->cur->entries[handle->k];
907         if (!offset)
908                 return -EFAULT;
909         error = hib_bio_read_page(offset, buf, bio_chain);
910         if (error)
911                 return error;
912         if (++handle->k >= MAP_PAGE_ENTRIES) {
913                 handle->k = 0;
914                 free_page((unsigned long)handle->maps->map);
915                 tmp = handle->maps;
916                 handle->maps = handle->maps->next;
917                 kfree(tmp);
918                 if (!handle->maps)
919                         release_swap_reader(handle);
920                 else
921                         handle->cur = handle->maps->map;
922         }
923         return error;
924 }
925
926 static int swap_reader_finish(struct swap_map_handle *handle)
927 {
928         release_swap_reader(handle);
929
930         return 0;
931 }
932
933 /**
934  *      load_image - load the image using the swap map handle
935  *      @handle and the snapshot handle @snapshot
936  *      (assume there are @nr_pages pages to load)
937  */
938
939 static int load_image(struct swap_map_handle *handle,
940                       struct snapshot_handle *snapshot,
941                       unsigned int nr_to_read)
942 {
943         unsigned int m;
944         int ret = 0;
945         struct timeval start;
946         struct timeval stop;
947         struct bio *bio;
948         int err2;
949         unsigned nr_pages;
950
951         printk(KERN_INFO "PM: Loading image data pages (%u pages) ...     ",
952                 nr_to_read);
953         m = nr_to_read / 100;
954         if (!m)
955                 m = 1;
956         nr_pages = 0;
957         bio = NULL;
958         do_gettimeofday(&start);
959         for ( ; ; ) {
960                 ret = snapshot_write_next(snapshot);
961                 if (ret <= 0)
962                         break;
963                 ret = swap_read_page(handle, data_of(*snapshot), &bio);
964                 if (ret)
965                         break;
966                 if (snapshot->sync_read)
967                         ret = hib_wait_on_bio_chain(&bio);
968                 if (ret)
969                         break;
970                 if (!(nr_pages % m))
971                         printk("\b\b\b\b%3d%%", nr_pages / m);
972                 nr_pages++;
973         }
974         err2 = hib_wait_on_bio_chain(&bio);
975         do_gettimeofday(&stop);
976         if (!ret)
977                 ret = err2;
978         if (!ret) {
979                 printk("\b\b\b\bdone\n");
980                 snapshot_write_finalize(snapshot);
981                 if (!snapshot_image_loaded(snapshot))
982                         ret = -ENODATA;
983         } else
984                 printk("\n");
985         swsusp_show_speed(&start, &stop, nr_to_read, "Read");
986         return ret;
987 }
988
989 /**
990  * Structure used for LZO data decompression.
991  */
992 struct dec_data {
993         struct task_struct *thr;                  /* thread */
994         atomic_t ready;                           /* ready to start flag */
995         atomic_t stop;                            /* ready to stop flag */
996         int ret;                                  /* return code */
997         wait_queue_head_t go;                     /* start decompression */
998         wait_queue_head_t done;                   /* decompression done */
999         size_t unc_len;                           /* uncompressed length */
1000         size_t cmp_len;                           /* compressed length */
1001         unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
1002         unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
1003 };
1004
1005 /**
1006  * Deompression function that runs in its own thread.
1007  */
1008 static int lzo_decompress_threadfn(void *data)
1009 {
1010         struct dec_data *d = data;
1011
1012         while (1) {
1013                 wait_event(d->go, atomic_read(&d->ready) ||
1014                                   kthread_should_stop());
1015                 if (kthread_should_stop()) {
1016                         d->thr = NULL;
1017                         d->ret = -1;
1018                         atomic_set(&d->stop, 1);
1019                         wake_up(&d->done);
1020                         break;
1021                 }
1022                 atomic_set(&d->ready, 0);
1023
1024                 d->unc_len = LZO_UNC_SIZE;
1025                 d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
1026                                                d->unc, &d->unc_len);
1027                 atomic_set(&d->stop, 1);
1028                 wake_up(&d->done);
1029         }
1030         return 0;
1031 }
1032
1033 /**
1034  * load_image_lzo - Load compressed image data and decompress them with LZO.
1035  * @handle: Swap map handle to use for loading data.
1036  * @snapshot: Image to copy uncompressed data into.
1037  * @nr_to_read: Number of pages to load.
1038  */
1039 static int load_image_lzo(struct swap_map_handle *handle,
1040                           struct snapshot_handle *snapshot,
1041                           unsigned int nr_to_read)
1042 {
1043         unsigned int m;
1044         int ret = 0;
1045         int eof = 0;
1046         struct bio *bio;
1047         struct timeval start;
1048         struct timeval stop;
1049         unsigned nr_pages;
1050         size_t off;
1051         unsigned i, thr, run_threads, nr_threads;
1052         unsigned ring = 0, pg = 0, ring_size = 0,
1053                  have = 0, want, need, asked = 0;
1054         unsigned long read_pages;
1055         unsigned char **page = NULL;
1056         struct dec_data *data = NULL;
1057         struct crc_data *crc = NULL;
1058
1059         /*
1060          * We'll limit the number of threads for decompression to limit memory
1061          * footprint.
1062          */
1063         nr_threads = num_online_cpus() - 1;
1064         nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
1065
1066         page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
1067         if (!page) {
1068                 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
1069                 ret = -ENOMEM;
1070                 goto out_clean;
1071         }
1072
1073         data = vmalloc(sizeof(*data) * nr_threads);
1074         if (!data) {
1075                 printk(KERN_ERR "PM: Failed to allocate LZO data\n");
1076                 ret = -ENOMEM;
1077                 goto out_clean;
1078         }
1079         for (thr = 0; thr < nr_threads; thr++)
1080                 memset(&data[thr], 0, offsetof(struct dec_data, go));
1081
1082         crc = kmalloc(sizeof(*crc), GFP_KERNEL);
1083         if (!crc) {
1084                 printk(KERN_ERR "PM: Failed to allocate crc\n");
1085                 ret = -ENOMEM;
1086                 goto out_clean;
1087         }
1088         memset(crc, 0, offsetof(struct crc_data, go));
1089
1090         /*
1091          * Start the decompression threads.
1092          */
1093         for (thr = 0; thr < nr_threads; thr++) {
1094                 init_waitqueue_head(&data[thr].go);
1095                 init_waitqueue_head(&data[thr].done);
1096
1097                 data[thr].thr = kthread_run(lzo_decompress_threadfn,
1098                                             &data[thr],
1099                                             "image_decompress/%u", thr);
1100                 if (IS_ERR(data[thr].thr)) {
1101                         data[thr].thr = NULL;
1102                         printk(KERN_ERR
1103                                "PM: Cannot start decompression threads\n");
1104                         ret = -ENOMEM;
1105                         goto out_clean;
1106                 }
1107         }
1108
1109         /*
1110          * Start the CRC32 thread.
1111          */
1112         init_waitqueue_head(&crc->go);
1113         init_waitqueue_head(&crc->done);
1114
1115         handle->crc32 = 0;
1116         crc->crc32 = &handle->crc32;
1117         for (thr = 0; thr < nr_threads; thr++) {
1118                 crc->unc[thr] = data[thr].unc;
1119                 crc->unc_len[thr] = &data[thr].unc_len;
1120         }
1121
1122         crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
1123         if (IS_ERR(crc->thr)) {
1124                 crc->thr = NULL;
1125                 printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
1126                 ret = -ENOMEM;
1127                 goto out_clean;
1128         }
1129
1130         /*
1131          * Adjust number of pages for read buffering, in case we are short.
1132          */
1133         read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1;
1134         read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES);
1135
1136         for (i = 0; i < read_pages; i++) {
1137                 page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
1138                                                   __GFP_WAIT | __GFP_HIGH :
1139                                                   __GFP_WAIT);
1140                 if (!page[i]) {
1141                         if (i < LZO_CMP_PAGES) {
1142                                 ring_size = i;
1143                                 printk(KERN_ERR
1144                                        "PM: Failed to allocate LZO pages\n");
1145                                 ret = -ENOMEM;
1146                                 goto out_clean;
1147                         } else {
1148                                 break;
1149                         }
1150                 }
1151         }
1152         want = ring_size = i;
1153
1154         printk(KERN_INFO
1155                 "PM: Using %u thread(s) for decompression.\n"
1156                 "PM: Loading and decompressing image data (%u pages) ...     ",
1157                 nr_threads, nr_to_read);
1158         m = nr_to_read / 100;
1159         if (!m)
1160                 m = 1;
1161         nr_pages = 0;
1162         bio = NULL;
1163         do_gettimeofday(&start);
1164
1165         ret = snapshot_write_next(snapshot);
1166         if (ret <= 0)
1167                 goto out_finish;
1168
1169         for(;;) {
1170                 for (i = 0; !eof && i < want; i++) {
1171                         ret = swap_read_page(handle, page[ring], &bio);
1172                         if (ret) {
1173                                 /*
1174                                  * On real read error, finish. On end of data,
1175                                  * set EOF flag and just exit the read loop.
1176                                  */
1177                                 if (handle->cur &&
1178                                     handle->cur->entries[handle->k]) {
1179                                         goto out_finish;
1180                                 } else {
1181                                         eof = 1;
1182                                         break;
1183                                 }
1184                         }
1185                         if (++ring >= ring_size)
1186                                 ring = 0;
1187                 }
1188                 asked += i;
1189                 want -= i;
1190
1191                 /*
1192                  * We are out of data, wait for some more.
1193                  */
1194                 if (!have) {
1195                         if (!asked)
1196                                 break;
1197
1198                         ret = hib_wait_on_bio_chain(&bio);
1199                         if (ret)
1200                                 goto out_finish;
1201                         have += asked;
1202                         asked = 0;
1203                         if (eof)
1204                                 eof = 2;
1205                 }
1206
1207                 if (crc->run_threads) {
1208                         wait_event(crc->done, atomic_read(&crc->stop));
1209                         atomic_set(&crc->stop, 0);
1210                         crc->run_threads = 0;
1211                 }
1212
1213                 for (thr = 0; have && thr < nr_threads; thr++) {
1214                         data[thr].cmp_len = *(size_t *)page[pg];
1215                         if (unlikely(!data[thr].cmp_len ||
1216                                      data[thr].cmp_len >
1217                                      lzo1x_worst_compress(LZO_UNC_SIZE))) {
1218                                 printk(KERN_ERR
1219                                        "PM: Invalid LZO compressed length\n");
1220                                 ret = -1;
1221                                 goto out_finish;
1222                         }
1223
1224                         need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
1225                                             PAGE_SIZE);
1226                         if (need > have) {
1227                                 if (eof > 1) {
1228                                         ret = -1;
1229                                         goto out_finish;
1230                                 }
1231                                 break;
1232                         }
1233
1234                         for (off = 0;
1235                              off < LZO_HEADER + data[thr].cmp_len;
1236                              off += PAGE_SIZE) {
1237                                 memcpy(data[thr].cmp + off,
1238                                        page[pg], PAGE_SIZE);
1239                                 have--;
1240                                 want++;
1241                                 if (++pg >= ring_size)
1242                                         pg = 0;
1243                         }
1244
1245                         atomic_set(&data[thr].ready, 1);
1246                         wake_up(&data[thr].go);
1247                 }
1248
1249                 /*
1250                  * Wait for more data while we are decompressing.
1251                  */
1252                 if (have < LZO_CMP_PAGES && asked) {
1253                         ret = hib_wait_on_bio_chain(&bio);
1254                         if (ret)
1255                                 goto out_finish;
1256                         have += asked;
1257                         asked = 0;
1258                         if (eof)
1259                                 eof = 2;
1260                 }
1261
1262                 for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
1263                         wait_event(data[thr].done,
1264                                    atomic_read(&data[thr].stop));
1265                         atomic_set(&data[thr].stop, 0);
1266
1267                         ret = data[thr].ret;
1268
1269                         if (ret < 0) {
1270                                 printk(KERN_ERR
1271                                        "PM: LZO decompression failed\n");
1272                                 goto out_finish;
1273                         }
1274
1275                         if (unlikely(!data[thr].unc_len ||
1276                                      data[thr].unc_len > LZO_UNC_SIZE ||
1277                                      data[thr].unc_len & (PAGE_SIZE - 1))) {
1278                                 printk(KERN_ERR
1279                                        "PM: Invalid LZO uncompressed length\n");
1280                                 ret = -1;
1281                                 goto out_finish;
1282                         }
1283
1284                         for (off = 0;
1285                              off < data[thr].unc_len; off += PAGE_SIZE) {
1286                                 memcpy(data_of(*snapshot),
1287                                        data[thr].unc + off, PAGE_SIZE);
1288
1289                                 if (!(nr_pages % m))
1290                                         printk("\b\b\b\b%3d%%", nr_pages / m);
1291                                 nr_pages++;
1292
1293                                 ret = snapshot_write_next(snapshot);
1294                                 if (ret <= 0) {
1295                                         crc->run_threads = thr + 1;
1296                                         atomic_set(&crc->ready, 1);
1297                                         wake_up(&crc->go);
1298                                         goto out_finish;
1299                                 }
1300                         }
1301                 }
1302
1303                 crc->run_threads = thr;
1304                 atomic_set(&crc->ready, 1);
1305                 wake_up(&crc->go);
1306         }
1307
1308 out_finish:
1309         if (crc->run_threads) {
1310                 wait_event(crc->done, atomic_read(&crc->stop));
1311                 atomic_set(&crc->stop, 0);
1312         }
1313         do_gettimeofday(&stop);
1314         if (!ret) {
1315                 printk("\b\b\b\bdone\n");
1316                 snapshot_write_finalize(snapshot);
1317                 if (!snapshot_image_loaded(snapshot))
1318                         ret = -ENODATA;
1319                 if (!ret) {
1320                         if (swsusp_header->flags & SF_CRC32_MODE) {
1321                                 if(handle->crc32 != swsusp_header->crc32) {
1322                                         printk(KERN_ERR
1323                                                "PM: Invalid image CRC32!\n");
1324                                         ret = -ENODATA;
1325                                 }
1326                         }
1327                 }
1328         } else
1329                 printk("\n");
1330         swsusp_show_speed(&start, &stop, nr_to_read, "Read");
1331 out_clean:
1332         for (i = 0; i < ring_size; i++)
1333                 free_page((unsigned long)page[i]);
1334         if (crc) {
1335                 if (crc->thr)
1336                         kthread_stop(crc->thr);
1337                 kfree(crc);
1338         }
1339         if (data) {
1340                 for (thr = 0; thr < nr_threads; thr++)
1341                         if (data[thr].thr)
1342                                 kthread_stop(data[thr].thr);
1343                 vfree(data);
1344         }
1345         if (page) vfree(page);
1346
1347         return ret;
1348 }
1349
1350 /**
1351  *      swsusp_read - read the hibernation image.
1352  *      @flags_p: flags passed by the "frozen" kernel in the image header should
1353  *                be written into this memory location
1354  */
1355
1356 int swsusp_read(unsigned int *flags_p)
1357 {
1358         int error;
1359         struct swap_map_handle handle;
1360         struct snapshot_handle snapshot;
1361         struct swsusp_info *header;
1362
1363         memset(&snapshot, 0, sizeof(struct snapshot_handle));
1364         error = snapshot_write_next(&snapshot);
1365         if (error < PAGE_SIZE)
1366                 return error < 0 ? error : -EFAULT;
1367         header = (struct swsusp_info *)data_of(snapshot);
1368         error = get_swap_reader(&handle, flags_p);
1369         if (error)
1370                 goto end;
1371         if (!error)
1372                 error = swap_read_page(&handle, header, NULL);
1373         if (!error) {
1374                 error = (*flags_p & SF_NOCOMPRESS_MODE) ?
1375                         load_image(&handle, &snapshot, header->pages - 1) :
1376                         load_image_lzo(&handle, &snapshot, header->pages - 1);
1377         }
1378         swap_reader_finish(&handle);
1379 end:
1380         if (!error)
1381                 pr_debug("PM: Image successfully loaded\n");
1382         else
1383                 pr_debug("PM: Error %d resuming\n", error);
1384         return error;
1385 }
1386
1387 /**
1388  *      swsusp_check - Check for swsusp signature in the resume device
1389  */
1390
1391 int swsusp_check(void)
1392 {
1393         int error;
1394
1395         hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
1396                                             FMODE_READ, NULL);
1397         if (!IS_ERR(hib_resume_bdev)) {
1398                 set_blocksize(hib_resume_bdev, PAGE_SIZE);
1399                 clear_page(swsusp_header);
1400                 error = hib_bio_read_page(swsusp_resume_block,
1401                                         swsusp_header, NULL);
1402                 if (error)
1403                         goto put;
1404
1405                 if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
1406                         memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
1407                         /* Reset swap signature now */
1408                         error = hib_bio_write_page(swsusp_resume_block,
1409                                                 swsusp_header, NULL);
1410                 } else {
1411                         error = -EINVAL;
1412                 }
1413
1414 put:
1415                 if (error)
1416                         blkdev_put(hib_resume_bdev, FMODE_READ);
1417                 else
1418                         pr_debug("PM: Image signature found, resuming\n");
1419         } else {
1420                 error = PTR_ERR(hib_resume_bdev);
1421         }
1422
1423         if (error)
1424                 pr_debug("PM: Image not found (code %d)\n", error);
1425
1426         return error;
1427 }
1428
1429 /**
1430  *      swsusp_close - close swap device.
1431  */
1432
1433 void swsusp_close(fmode_t mode)
1434 {
1435         if (IS_ERR(hib_resume_bdev)) {
1436                 pr_debug("PM: Image device not initialised\n");
1437                 return;
1438         }
1439
1440         blkdev_put(hib_resume_bdev, mode);
1441 }
1442
1443 static int swsusp_header_init(void)
1444 {
1445         swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
1446         if (!swsusp_header)
1447                 panic("Could not allocate memory for swsusp_header\n");
1448         return 0;
1449 }
1450
1451 core_initcall(swsusp_header_init);