b1a491a5fe78a4bf899263e17c6a22c60015b870
[linux-2.6.git] / fs / logfs / super.c
1 /*
2  * fs/logfs/super.c
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7  *
8  * Generally contains mount/umount code and also serves as a dump area for
9  * any functions that don't fit elsewhere and neither justify a file of their
10  * own.
11  */
12 #include "logfs.h"
13 #include <linux/bio.h>
14 #include <linux/slab.h>
15 #include <linux/blkdev.h>
16 #include <linux/module.h>
17 #include <linux/mtd/mtd.h>
18 #include <linux/statfs.h>
19 #include <linux/buffer_head.h>
20
21 static DEFINE_MUTEX(emergency_mutex);
22 static struct page *emergency_page;
23
24 struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
25 {
26         filler_t *filler = (filler_t *)mapping->a_ops->readpage;
27         struct page *page;
28         int err;
29
30         page = read_cache_page(mapping, index, filler, NULL);
31         if (page)
32                 return page;
33
34         /* No more pages available, switch to emergency page */
35         printk(KERN_INFO"Logfs: Using emergency page\n");
36         mutex_lock(&emergency_mutex);
37         err = filler(NULL, emergency_page);
38         if (err) {
39                 mutex_unlock(&emergency_mutex);
40                 printk(KERN_EMERG"Logfs: Error reading emergency page\n");
41                 return ERR_PTR(err);
42         }
43         return emergency_page;
44 }
45
46 void emergency_read_end(struct page *page)
47 {
48         if (page == emergency_page)
49                 mutex_unlock(&emergency_mutex);
50         else
51                 page_cache_release(page);
52 }
53
54 static void dump_segfile(struct super_block *sb)
55 {
56         struct logfs_super *super = logfs_super(sb);
57         struct logfs_segment_entry se;
58         u32 segno;
59
60         for (segno = 0; segno < super->s_no_segs; segno++) {
61                 logfs_get_segment_entry(sb, segno, &se);
62                 printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
63                                 be32_to_cpu(se.valid));
64                 if (++segno < super->s_no_segs) {
65                         logfs_get_segment_entry(sb, segno, &se);
66                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
67                                         be32_to_cpu(se.valid));
68                 }
69                 if (++segno < super->s_no_segs) {
70                         logfs_get_segment_entry(sb, segno, &se);
71                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
72                                         be32_to_cpu(se.valid));
73                 }
74                 if (++segno < super->s_no_segs) {
75                         logfs_get_segment_entry(sb, segno, &se);
76                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
77                                         be32_to_cpu(se.valid));
78                 }
79                 printk("\n");
80         }
81 }
82
83 /*
84  * logfs_crash_dump - dump debug information to device
85  *
86  * The LogFS superblock only occupies part of a segment.  This function will
87  * write as much debug information as it can gather into the spare space.
88  */
89 void logfs_crash_dump(struct super_block *sb)
90 {
91         dump_segfile(sb);
92 }
93
94 /*
95  * FIXME: There should be a reserve for root, similar to ext2.
96  */
97 int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
98 {
99         struct super_block *sb = dentry->d_sb;
100         struct logfs_super *super = logfs_super(sb);
101
102         stats->f_type           = LOGFS_MAGIC_U32;
103         stats->f_bsize          = sb->s_blocksize;
104         stats->f_blocks         = super->s_size >> LOGFS_BLOCK_BITS >> 3;
105         stats->f_bfree          = super->s_free_bytes >> sb->s_blocksize_bits;
106         stats->f_bavail         = super->s_free_bytes >> sb->s_blocksize_bits;
107         stats->f_files          = 0;
108         stats->f_ffree          = 0;
109         stats->f_namelen        = LOGFS_MAX_NAMELEN;
110         return 0;
111 }
112
113 static int logfs_sb_set(struct super_block *sb, void *_super)
114 {
115         struct logfs_super *super = _super;
116
117         sb->s_fs_info = super;
118         sb->s_mtd = super->s_mtd;
119         sb->s_bdev = super->s_bdev;
120 #ifdef CONFIG_BLOCK
121         if (sb->s_bdev)
122                 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
123 #endif
124 #ifdef CONFIG_MTD
125         if (sb->s_mtd)
126                 sb->s_bdi = sb->s_mtd->backing_dev_info;
127 #endif
128         return 0;
129 }
130
131 static int logfs_sb_test(struct super_block *sb, void *_super)
132 {
133         struct logfs_super *super = _super;
134         struct mtd_info *mtd = super->s_mtd;
135
136         if (mtd && sb->s_mtd == mtd)
137                 return 1;
138         if (super->s_bdev && sb->s_bdev == super->s_bdev)
139                 return 1;
140         return 0;
141 }
142
143 static void set_segment_header(struct logfs_segment_header *sh, u8 type,
144                 u8 level, u32 segno, u32 ec)
145 {
146         sh->pad = 0;
147         sh->type = type;
148         sh->level = level;
149         sh->segno = cpu_to_be32(segno);
150         sh->ec = cpu_to_be32(ec);
151         sh->gec = cpu_to_be64(segno);
152         sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
153 }
154
155 static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
156                 u32 segno, u32 ec)
157 {
158         struct logfs_super *super = logfs_super(sb);
159         struct logfs_segment_header *sh = &ds->ds_sh;
160         int i;
161
162         memset(ds, 0, sizeof(*ds));
163         set_segment_header(sh, SEG_SUPER, 0, segno, ec);
164
165         ds->ds_ifile_levels     = super->s_ifile_levels;
166         ds->ds_iblock_levels    = super->s_iblock_levels;
167         ds->ds_data_levels      = super->s_data_levels; /* XXX: Remove */
168         ds->ds_segment_shift    = super->s_segshift;
169         ds->ds_block_shift      = sb->s_blocksize_bits;
170         ds->ds_write_shift      = super->s_writeshift;
171         ds->ds_filesystem_size  = cpu_to_be64(super->s_size);
172         ds->ds_segment_size     = cpu_to_be32(super->s_segsize);
173         ds->ds_bad_seg_reserve  = cpu_to_be32(super->s_bad_seg_reserve);
174         ds->ds_feature_incompat = cpu_to_be64(super->s_feature_incompat);
175         ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
176         ds->ds_feature_compat   = cpu_to_be64(super->s_feature_compat);
177         ds->ds_feature_flags    = cpu_to_be64(super->s_feature_flags);
178         ds->ds_root_reserve     = cpu_to_be64(super->s_root_reserve);
179         ds->ds_speed_reserve    = cpu_to_be64(super->s_speed_reserve);
180         journal_for_each(i)
181                 ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
182         ds->ds_magic            = cpu_to_be64(LOGFS_MAGIC);
183         ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
184                         LOGFS_SEGMENT_HEADERSIZE + 12);
185 }
186
187 static int write_one_sb(struct super_block *sb,
188                 struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
189 {
190         struct logfs_super *super = logfs_super(sb);
191         struct logfs_disk_super *ds;
192         struct logfs_segment_entry se;
193         struct page *page;
194         u64 ofs;
195         u32 ec, segno;
196         int err;
197
198         page = find_sb(sb, &ofs);
199         if (!page)
200                 return -EIO;
201         ds = page_address(page);
202         segno = seg_no(sb, ofs);
203         logfs_get_segment_entry(sb, segno, &se);
204         ec = be32_to_cpu(se.ec_level) >> 4;
205         ec++;
206         logfs_set_segment_erased(sb, segno, ec, 0);
207         logfs_write_ds(sb, ds, segno, ec);
208         err = super->s_devops->write_sb(sb, page);
209         page_cache_release(page);
210         return err;
211 }
212
213 int logfs_write_sb(struct super_block *sb)
214 {
215         struct logfs_super *super = logfs_super(sb);
216         int err;
217
218         /* First superblock */
219         err = write_one_sb(sb, super->s_devops->find_first_sb);
220         if (err)
221                 return err;
222
223         /* Last superblock */
224         err = write_one_sb(sb, super->s_devops->find_last_sb);
225         if (err)
226                 return err;
227         return 0;
228 }
229
230 static int ds_cmp(const void *ds0, const void *ds1)
231 {
232         size_t len = sizeof(struct logfs_disk_super);
233
234         /* We know the segment headers differ, so ignore them */
235         len -= LOGFS_SEGMENT_HEADERSIZE;
236         ds0 += LOGFS_SEGMENT_HEADERSIZE;
237         ds1 += LOGFS_SEGMENT_HEADERSIZE;
238         return memcmp(ds0, ds1, len);
239 }
240
241 static int logfs_recover_sb(struct super_block *sb)
242 {
243         struct logfs_super *super = logfs_super(sb);
244         struct logfs_disk_super _ds0, *ds0 = &_ds0;
245         struct logfs_disk_super _ds1, *ds1 = &_ds1;
246         int err, valid0, valid1;
247
248         /* read first superblock */
249         err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
250         if (err)
251                 return err;
252         /* read last superblock */
253         err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
254         if (err)
255                 return err;
256         valid0 = logfs_check_ds(ds0) == 0;
257         valid1 = logfs_check_ds(ds1) == 0;
258
259         if (!valid0 && valid1) {
260                 printk(KERN_INFO"First superblock is invalid - fixing.\n");
261                 return write_one_sb(sb, super->s_devops->find_first_sb);
262         }
263         if (valid0 && !valid1) {
264                 printk(KERN_INFO"Last superblock is invalid - fixing.\n");
265                 return write_one_sb(sb, super->s_devops->find_last_sb);
266         }
267         if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
268                 printk(KERN_INFO"Superblocks don't match - fixing.\n");
269                 return logfs_write_sb(sb);
270         }
271         /* If neither is valid now, something's wrong.  Didn't we properly
272          * check them before?!? */
273         BUG_ON(!valid0 && !valid1);
274         return 0;
275 }
276
277 static int logfs_make_writeable(struct super_block *sb)
278 {
279         int err;
280
281         err = logfs_open_segfile(sb);
282         if (err)
283                 return err;
284
285         /* Repair any broken superblock copies */
286         err = logfs_recover_sb(sb);
287         if (err)
288                 return err;
289
290         /* Check areas for trailing unaccounted data */
291         err = logfs_check_areas(sb);
292         if (err)
293                 return err;
294
295         /* Do one GC pass before any data gets dirtied */
296         logfs_gc_pass(sb);
297
298         /* after all initializations are done, replay the journal
299          * for rw-mounts, if necessary */
300         err = logfs_replay_journal(sb);
301         if (err)
302                 return err;
303
304         return 0;
305 }
306
307 static int logfs_get_sb_final(struct super_block *sb)
308 {
309         struct logfs_super *super = logfs_super(sb);
310         struct inode *rootdir;
311         int err;
312
313         /* root dir */
314         rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
315         if (IS_ERR(rootdir))
316                 goto fail;
317
318         sb->s_root = d_alloc_root(rootdir);
319         if (!sb->s_root) {
320                 iput(rootdir);
321                 goto fail;
322         }
323
324         /* at that point we know that ->put_super() will be called */
325         super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
326         if (!super->s_erase_page)
327                 return -ENOMEM;
328         memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
329
330         /* FIXME: check for read-only mounts */
331         err = logfs_make_writeable(sb);
332         if (err) {
333                 __free_page(super->s_erase_page);
334                 return err;
335         }
336
337         log_super("LogFS: Finished mounting\n");
338         return 0;
339
340 fail:
341         iput(super->s_master_inode);
342         iput(super->s_segfile_inode);
343         iput(super->s_mapping_inode);
344         return -EIO;
345 }
346
347 int logfs_check_ds(struct logfs_disk_super *ds)
348 {
349         struct logfs_segment_header *sh = &ds->ds_sh;
350
351         if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
352                 return -EINVAL;
353         if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
354                 return -EINVAL;
355         if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
356                                 LOGFS_SEGMENT_HEADERSIZE + 12))
357                 return -EINVAL;
358         return 0;
359 }
360
361 static struct page *find_super_block(struct super_block *sb)
362 {
363         struct logfs_super *super = logfs_super(sb);
364         struct page *first, *last;
365
366         first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
367         if (!first || IS_ERR(first))
368                 return NULL;
369         last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
370         if (!last || IS_ERR(last)) {
371                 page_cache_release(first);
372                 return NULL;
373         }
374
375         if (!logfs_check_ds(page_address(first))) {
376                 page_cache_release(last);
377                 return first;
378         }
379
380         /* First one didn't work, try the second superblock */
381         if (!logfs_check_ds(page_address(last))) {
382                 page_cache_release(first);
383                 return last;
384         }
385
386         /* Neither worked, sorry folks */
387         page_cache_release(first);
388         page_cache_release(last);
389         return NULL;
390 }
391
392 static int __logfs_read_sb(struct super_block *sb)
393 {
394         struct logfs_super *super = logfs_super(sb);
395         struct page *page;
396         struct logfs_disk_super *ds;
397         int i;
398
399         page = find_super_block(sb);
400         if (!page)
401                 return -EINVAL;
402
403         ds = page_address(page);
404         super->s_size = be64_to_cpu(ds->ds_filesystem_size);
405         super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
406         super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
407         super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
408         super->s_segsize = 1 << ds->ds_segment_shift;
409         super->s_segmask = (1 << ds->ds_segment_shift) - 1;
410         super->s_segshift = ds->ds_segment_shift;
411         sb->s_blocksize = 1 << ds->ds_block_shift;
412         sb->s_blocksize_bits = ds->ds_block_shift;
413         super->s_writesize = 1 << ds->ds_write_shift;
414         super->s_writeshift = ds->ds_write_shift;
415         super->s_no_segs = super->s_size >> super->s_segshift;
416         super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
417         super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
418         super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
419         super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
420         super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
421
422         journal_for_each(i)
423                 super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
424
425         super->s_ifile_levels = ds->ds_ifile_levels;
426         super->s_iblock_levels = ds->ds_iblock_levels;
427         super->s_data_levels = ds->ds_data_levels;
428         super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
429                 + super->s_data_levels;
430         page_cache_release(page);
431         return 0;
432 }
433
434 static int logfs_read_sb(struct super_block *sb, int read_only)
435 {
436         struct logfs_super *super = logfs_super(sb);
437         int ret;
438
439         super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
440         if (!super->s_btree_pool)
441                 return -ENOMEM;
442
443         btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
444         btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
445         btree_init_mempool32(&super->s_shadow_tree.segment_map,
446                         super->s_btree_pool);
447
448         ret = logfs_init_mapping(sb);
449         if (ret)
450                 return ret;
451
452         ret = __logfs_read_sb(sb);
453         if (ret)
454                 return ret;
455
456         if (super->s_feature_incompat & ~LOGFS_FEATURES_INCOMPAT)
457                 return -EIO;
458         if ((super->s_feature_ro_compat & ~LOGFS_FEATURES_RO_COMPAT) &&
459                         !read_only)
460                 return -EIO;
461
462         ret = logfs_init_rw(sb);
463         if (ret)
464                 return ret;
465
466         ret = logfs_init_areas(sb);
467         if (ret)
468                 return ret;
469
470         ret = logfs_init_gc(sb);
471         if (ret)
472                 return ret;
473
474         ret = logfs_init_journal(sb);
475         if (ret)
476                 return ret;
477
478         return 0;
479 }
480
481 static void logfs_kill_sb(struct super_block *sb)
482 {
483         struct logfs_super *super = logfs_super(sb);
484
485         log_super("LogFS: Start unmounting\n");
486         /* Alias entries slow down mount, so evict as many as possible */
487         sync_filesystem(sb);
488         logfs_write_anchor(sb);
489         free_areas(sb);
490
491         /*
492          * From this point on alias entries are simply dropped - and any
493          * writes to the object store are considered bugs.
494          */
495         log_super("LogFS: Now in shutdown\n");
496         generic_shutdown_super(sb);
497         super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
498
499         BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
500
501         logfs_cleanup_gc(sb);
502         logfs_cleanup_journal(sb);
503         logfs_cleanup_areas(sb);
504         logfs_cleanup_rw(sb);
505         if (super->s_erase_page)
506                 __free_page(super->s_erase_page);
507         super->s_devops->put_device(super);
508         logfs_mempool_destroy(super->s_btree_pool);
509         logfs_mempool_destroy(super->s_alias_pool);
510         kfree(super);
511         log_super("LogFS: Finished unmounting\n");
512 }
513
514 static struct dentry *logfs_get_sb_device(struct logfs_super *super,
515                 struct file_system_type *type, int flags)
516 {
517         struct super_block *sb;
518         int err = -ENOMEM;
519         static int mount_count;
520
521         log_super("LogFS: Start mount %x\n", mount_count++);
522
523         err = -EINVAL;
524         sb = sget(type, logfs_sb_test, logfs_sb_set, super);
525         if (IS_ERR(sb)) {
526                 super->s_devops->put_device(super);
527                 kfree(super);
528                 return ERR_CAST(sb);
529         }
530
531         if (sb->s_root) {
532                 /* Device is already in use */
533                 super->s_devops->put_device(super);
534                 kfree(super);
535                 return dget(sb->s_root);
536         }
537
538         /*
539          * sb->s_maxbytes is limited to 8TB.  On 32bit systems, the page cache
540          * only covers 16TB and the upper 8TB are used for indirect blocks.
541          * On 64bit system we could bump up the limit, but that would make
542          * the filesystem incompatible with 32bit systems.
543          */
544         sb->s_maxbytes  = (1ull << 43) - 1;
545         sb->s_max_links = LOGFS_LINK_MAX;
546         sb->s_op        = &logfs_super_operations;
547         sb->s_flags     = flags | MS_NOATIME;
548
549         err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY);
550         if (err)
551                 goto err1;
552
553         sb->s_flags |= MS_ACTIVE;
554         err = logfs_get_sb_final(sb);
555         if (err) {
556                 deactivate_locked_super(sb);
557                 return ERR_PTR(err);
558         }
559         return dget(sb->s_root);
560
561 err1:
562         /* no ->s_root, no ->put_super() */
563         iput(super->s_master_inode);
564         iput(super->s_segfile_inode);
565         iput(super->s_mapping_inode);
566         deactivate_locked_super(sb);
567         return ERR_PTR(err);
568 }
569
570 static struct dentry *logfs_mount(struct file_system_type *type, int flags,
571                 const char *devname, void *data)
572 {
573         ulong mtdnr;
574         struct logfs_super *super;
575         int err;
576
577         super = kzalloc(sizeof(*super), GFP_KERNEL);
578         if (!super)
579                 return ERR_PTR(-ENOMEM);
580
581         mutex_init(&super->s_dirop_mutex);
582         mutex_init(&super->s_object_alias_mutex);
583         INIT_LIST_HEAD(&super->s_freeing_list);
584
585         if (!devname)
586                 err = logfs_get_sb_bdev(super, type, devname);
587         else if (strncmp(devname, "mtd", 3))
588                 err = logfs_get_sb_bdev(super, type, devname);
589         else {
590                 char *garbage;
591                 mtdnr = simple_strtoul(devname+3, &garbage, 0);
592                 if (*garbage)
593                         err = -EINVAL;
594                 else
595                         err = logfs_get_sb_mtd(super, mtdnr);
596         }
597
598         if (err) {
599                 kfree(super);
600                 return ERR_PTR(err);
601         }
602
603         return logfs_get_sb_device(super, type, flags);
604 }
605
606 static struct file_system_type logfs_fs_type = {
607         .owner          = THIS_MODULE,
608         .name           = "logfs",
609         .mount          = logfs_mount,
610         .kill_sb        = logfs_kill_sb,
611         .fs_flags       = FS_REQUIRES_DEV,
612
613 };
614
615 static int __init logfs_init(void)
616 {
617         int ret;
618
619         emergency_page = alloc_pages(GFP_KERNEL, 0);
620         if (!emergency_page)
621                 return -ENOMEM;
622
623         ret = logfs_compr_init();
624         if (ret)
625                 goto out1;
626
627         ret = logfs_init_inode_cache();
628         if (ret)
629                 goto out2;
630
631         return register_filesystem(&logfs_fs_type);
632 out2:
633         logfs_compr_exit();
634 out1:
635         __free_pages(emergency_page, 0);
636         return ret;
637 }
638
639 static void __exit logfs_exit(void)
640 {
641         unregister_filesystem(&logfs_fs_type);
642         logfs_destroy_inode_cache();
643         logfs_compr_exit();
644         __free_pages(emergency_page, 0);
645 }
646
647 module_init(logfs_init);
648 module_exit(logfs_exit);
649
650 MODULE_LICENSE("GPL v2");
651 MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
652 MODULE_DESCRIPTION("scalable flash filesystem");