[LogFS] add new flash file system
[linux-2.6.git] / fs / logfs / super.c
1 /*
2  * fs/logfs/super.c
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7  *
8  * Generally contains mount/umount code and also serves as a dump area for
9  * any functions that don't fit elsewhere and neither justify a file of their
10  * own.
11  */
12 #include "logfs.h"
13 #include <linux/bio.h>
14 #include <linux/mtd/mtd.h>
15 #include <linux/statfs.h>
16 #include <linux/buffer_head.h>
17
18 static DEFINE_MUTEX(emergency_mutex);
19 static struct page *emergency_page;
20
21 struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
22 {
23         filler_t *filler = (filler_t *)mapping->a_ops->readpage;
24         struct page *page;
25         int err;
26
27         page = read_cache_page(mapping, index, filler, NULL);
28         if (page)
29                 return page;
30
31         /* No more pages available, switch to emergency page */
32         printk(KERN_INFO"Logfs: Using emergency page\n");
33         mutex_lock(&emergency_mutex);
34         err = filler(NULL, emergency_page);
35         if (err) {
36                 mutex_unlock(&emergency_mutex);
37                 printk(KERN_EMERG"Logfs: Error reading emergency page\n");
38                 return ERR_PTR(err);
39         }
40         return emergency_page;
41 }
42
43 void emergency_read_end(struct page *page)
44 {
45         if (page == emergency_page)
46                 mutex_unlock(&emergency_mutex);
47         else
48                 page_cache_release(page);
49 }
50
51 static void dump_segfile(struct super_block *sb)
52 {
53         struct logfs_super *super = logfs_super(sb);
54         struct logfs_segment_entry se;
55         u32 segno;
56
57         for (segno = 0; segno < super->s_no_segs; segno++) {
58                 logfs_get_segment_entry(sb, segno, &se);
59                 printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
60                                 be32_to_cpu(se.valid));
61                 if (++segno < super->s_no_segs) {
62                         logfs_get_segment_entry(sb, segno, &se);
63                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
64                                         be32_to_cpu(se.valid));
65                 }
66                 if (++segno < super->s_no_segs) {
67                         logfs_get_segment_entry(sb, segno, &se);
68                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
69                                         be32_to_cpu(se.valid));
70                 }
71                 if (++segno < super->s_no_segs) {
72                         logfs_get_segment_entry(sb, segno, &se);
73                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
74                                         be32_to_cpu(se.valid));
75                 }
76                 printk("\n");
77         }
78 }
79
80 /*
81  * logfs_crash_dump - dump debug information to device
82  *
83  * The LogFS superblock only occupies part of a segment.  This function will
84  * write as much debug information as it can gather into the spare space.
85  */
86 void logfs_crash_dump(struct super_block *sb)
87 {
88         dump_segfile(sb);
89 }
90
91 /*
92  * TODO: move to lib/string.c
93  */
94 /**
95  * memchr_inv - Find a character in an area of memory.
96  * @s: The memory area
97  * @c: The byte to search for
98  * @n: The size of the area.
99  *
100  * returns the address of the first character other than @c, or %NULL
101  * if the whole buffer contains just @c.
102  */
103 void *memchr_inv(const void *s, int c, size_t n)
104 {
105         const unsigned char *p = s;
106         while (n-- != 0)
107                 if ((unsigned char)c != *p++)
108                         return (void *)(p - 1);
109
110         return NULL;
111 }
112
113 /*
114  * FIXME: There should be a reserve for root, similar to ext2.
115  */
116 int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
117 {
118         struct super_block *sb = dentry->d_sb;
119         struct logfs_super *super = logfs_super(sb);
120
121         stats->f_type           = LOGFS_MAGIC_U32;
122         stats->f_bsize          = sb->s_blocksize;
123         stats->f_blocks         = super->s_size >> LOGFS_BLOCK_BITS >> 3;
124         stats->f_bfree          = super->s_free_bytes >> sb->s_blocksize_bits;
125         stats->f_bavail         = super->s_free_bytes >> sb->s_blocksize_bits;
126         stats->f_files          = 0;
127         stats->f_ffree          = 0;
128         stats->f_namelen        = LOGFS_MAX_NAMELEN;
129         return 0;
130 }
131
132 static int logfs_sb_set(struct super_block *sb, void *_super)
133 {
134         struct logfs_super *super = _super;
135
136         sb->s_fs_info = super;
137         sb->s_mtd = super->s_mtd;
138         sb->s_bdev = super->s_bdev;
139         return 0;
140 }
141
142 static int logfs_sb_test(struct super_block *sb, void *_super)
143 {
144         struct logfs_super *super = _super;
145         struct mtd_info *mtd = super->s_mtd;
146
147         if (mtd && sb->s_mtd == mtd)
148                 return 1;
149         if (super->s_bdev && sb->s_bdev == super->s_bdev)
150                 return 1;
151         return 0;
152 }
153
154 static void set_segment_header(struct logfs_segment_header *sh, u8 type,
155                 u8 level, u32 segno, u32 ec)
156 {
157         sh->pad = 0;
158         sh->type = type;
159         sh->level = level;
160         sh->segno = cpu_to_be32(segno);
161         sh->ec = cpu_to_be32(ec);
162         sh->gec = cpu_to_be64(segno);
163         sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
164 }
165
166 static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
167                 u32 segno, u32 ec)
168 {
169         struct logfs_super *super = logfs_super(sb);
170         struct logfs_segment_header *sh = &ds->ds_sh;
171         int i;
172
173         memset(ds, 0, sizeof(*ds));
174         set_segment_header(sh, SEG_SUPER, 0, segno, ec);
175
176         ds->ds_ifile_levels     = super->s_ifile_levels;
177         ds->ds_iblock_levels    = super->s_iblock_levels;
178         ds->ds_data_levels      = super->s_data_levels; /* XXX: Remove */
179         ds->ds_segment_shift    = super->s_segshift;
180         ds->ds_block_shift      = sb->s_blocksize_bits;
181         ds->ds_write_shift      = super->s_writeshift;
182         ds->ds_filesystem_size  = cpu_to_be64(super->s_size);
183         ds->ds_segment_size     = cpu_to_be32(super->s_segsize);
184         ds->ds_bad_seg_reserve  = cpu_to_be32(super->s_bad_seg_reserve);
185         ds->ds_feature_incompat = cpu_to_be64(super->s_feature_incompat);
186         ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
187         ds->ds_feature_compat   = cpu_to_be64(super->s_feature_compat);
188         ds->ds_feature_flags    = cpu_to_be64(super->s_feature_flags);
189         ds->ds_root_reserve     = cpu_to_be64(super->s_root_reserve);
190         ds->ds_speed_reserve    = cpu_to_be64(super->s_speed_reserve);
191         journal_for_each(i)
192                 ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
193         ds->ds_magic            = cpu_to_be64(LOGFS_MAGIC);
194         ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
195                         LOGFS_SEGMENT_HEADERSIZE + 12);
196 }
197
198 static int write_one_sb(struct super_block *sb,
199                 struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
200 {
201         struct logfs_super *super = logfs_super(sb);
202         struct logfs_disk_super *ds;
203         struct logfs_segment_entry se;
204         struct page *page;
205         u64 ofs;
206         u32 ec, segno;
207         int err;
208
209         page = find_sb(sb, &ofs);
210         if (!page)
211                 return -EIO;
212         ds = page_address(page);
213         segno = seg_no(sb, ofs);
214         logfs_get_segment_entry(sb, segno, &se);
215         ec = be32_to_cpu(se.ec_level) >> 4;
216         ec++;
217         logfs_set_segment_erased(sb, segno, ec, 0);
218         logfs_write_ds(sb, ds, segno, ec);
219         err = super->s_devops->write_sb(sb, page);
220         page_cache_release(page);
221         return err;
222 }
223
224 int logfs_write_sb(struct super_block *sb)
225 {
226         struct logfs_super *super = logfs_super(sb);
227         int err;
228
229         /* First superblock */
230         err = write_one_sb(sb, super->s_devops->find_first_sb);
231         if (err)
232                 return err;
233
234         /* Last superblock */
235         err = write_one_sb(sb, super->s_devops->find_last_sb);
236         if (err)
237                 return err;
238         return 0;
239 }
240
241 static int ds_cmp(const void *ds0, const void *ds1)
242 {
243         size_t len = sizeof(struct logfs_disk_super);
244
245         /* We know the segment headers differ, so ignore them */
246         len -= LOGFS_SEGMENT_HEADERSIZE;
247         ds0 += LOGFS_SEGMENT_HEADERSIZE;
248         ds1 += LOGFS_SEGMENT_HEADERSIZE;
249         return memcmp(ds0, ds1, len);
250 }
251
252 static int logfs_recover_sb(struct super_block *sb)
253 {
254         struct logfs_super *super = logfs_super(sb);
255         struct logfs_disk_super _ds0, *ds0 = &_ds0;
256         struct logfs_disk_super _ds1, *ds1 = &_ds1;
257         int err, valid0, valid1;
258
259         /* read first superblock */
260         err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
261         if (err)
262                 return err;
263         /* read last superblock */
264         err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
265         if (err)
266                 return err;
267         valid0 = logfs_check_ds(ds0) == 0;
268         valid1 = logfs_check_ds(ds1) == 0;
269
270         if (!valid0 && valid1) {
271                 printk(KERN_INFO"First superblock is invalid - fixing.\n");
272                 return write_one_sb(sb, super->s_devops->find_first_sb);
273         }
274         if (valid0 && !valid1) {
275                 printk(KERN_INFO"Last superblock is invalid - fixing.\n");
276                 return write_one_sb(sb, super->s_devops->find_last_sb);
277         }
278         if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
279                 printk(KERN_INFO"Superblocks don't match - fixing.\n");
280                 return write_one_sb(sb, super->s_devops->find_last_sb);
281         }
282         /* If neither is valid now, something's wrong.  Didn't we properly
283          * check them before?!? */
284         BUG_ON(!valid0 && !valid1);
285         return 0;
286 }
287
288 static int logfs_make_writeable(struct super_block *sb)
289 {
290         int err;
291
292         /* Repair any broken superblock copies */
293         err = logfs_recover_sb(sb);
294         if (err)
295                 return err;
296
297         /* Check areas for trailing unaccounted data */
298         err = logfs_check_areas(sb);
299         if (err)
300                 return err;
301
302         err = logfs_open_segfile(sb);
303         if (err)
304                 return err;
305
306         /* Do one GC pass before any data gets dirtied */
307         logfs_gc_pass(sb);
308
309         /* after all initializations are done, replay the journal
310          * for rw-mounts, if necessary */
311         err = logfs_replay_journal(sb);
312         if (err)
313                 return err;
314
315         return 0;
316 }
317
318 static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
319 {
320         struct inode *rootdir;
321         int err;
322
323         /* root dir */
324         rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
325         if (IS_ERR(rootdir))
326                 goto fail;
327
328         sb->s_root = d_alloc_root(rootdir);
329         if (!sb->s_root)
330                 goto fail;
331
332         /* FIXME: check for read-only mounts */
333         err = logfs_make_writeable(sb);
334         if (err)
335                 goto fail2;
336
337         log_super("LogFS: Finished mounting\n");
338         simple_set_mnt(mnt, sb);
339         return 0;
340
341 fail2:
342         iput(rootdir);
343 fail:
344         iput(logfs_super(sb)->s_master_inode);
345         return -EIO;
346 }
347
348 int logfs_check_ds(struct logfs_disk_super *ds)
349 {
350         struct logfs_segment_header *sh = &ds->ds_sh;
351
352         if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
353                 return -EINVAL;
354         if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
355                 return -EINVAL;
356         if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
357                                 LOGFS_SEGMENT_HEADERSIZE + 12))
358                 return -EINVAL;
359         return 0;
360 }
361
362 static struct page *find_super_block(struct super_block *sb)
363 {
364         struct logfs_super *super = logfs_super(sb);
365         struct page *first, *last;
366
367         first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
368         if (!first || IS_ERR(first))
369                 return NULL;
370         last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
371         if (!last || IS_ERR(first)) {
372                 page_cache_release(first);
373                 return NULL;
374         }
375
376         if (!logfs_check_ds(page_address(first))) {
377                 page_cache_release(last);
378                 return first;
379         }
380
381         /* First one didn't work, try the second superblock */
382         if (!logfs_check_ds(page_address(last))) {
383                 page_cache_release(first);
384                 return last;
385         }
386
387         /* Neither worked, sorry folks */
388         page_cache_release(first);
389         page_cache_release(last);
390         return NULL;
391 }
392
393 static int __logfs_read_sb(struct super_block *sb)
394 {
395         struct logfs_super *super = logfs_super(sb);
396         struct page *page;
397         struct logfs_disk_super *ds;
398         int i;
399
400         page = find_super_block(sb);
401         if (!page)
402                 return -EIO;
403
404         ds = page_address(page);
405         super->s_size = be64_to_cpu(ds->ds_filesystem_size);
406         super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
407         super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
408         super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
409         super->s_segsize = 1 << ds->ds_segment_shift;
410         super->s_segmask = (1 << ds->ds_segment_shift) - 1;
411         super->s_segshift = ds->ds_segment_shift;
412         sb->s_blocksize = 1 << ds->ds_block_shift;
413         sb->s_blocksize_bits = ds->ds_block_shift;
414         super->s_writesize = 1 << ds->ds_write_shift;
415         super->s_writeshift = ds->ds_write_shift;
416         super->s_no_segs = super->s_size >> super->s_segshift;
417         super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
418         super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
419         super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
420         super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
421         super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
422
423         journal_for_each(i)
424                 super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
425
426         super->s_ifile_levels = ds->ds_ifile_levels;
427         super->s_iblock_levels = ds->ds_iblock_levels;
428         super->s_data_levels = ds->ds_data_levels;
429         super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
430                 + super->s_data_levels;
431         page_cache_release(page);
432         return 0;
433 }
434
435 static int logfs_read_sb(struct super_block *sb)
436 {
437         struct logfs_super *super = logfs_super(sb);
438         int ret;
439
440         super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
441         if (!super->s_btree_pool)
442                 return -ENOMEM;
443
444         btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
445         btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
446
447         ret = logfs_init_mapping(sb);
448         if (ret)
449                 return ret;
450
451         ret = __logfs_read_sb(sb);
452         if (ret)
453                 return ret;
454
455         mutex_init(&super->s_dirop_mutex);
456         mutex_init(&super->s_object_alias_mutex);
457         INIT_LIST_HEAD(&super->s_freeing_list);
458
459         ret = logfs_init_rw(sb);
460         if (ret)
461                 return ret;
462
463         ret = logfs_init_areas(sb);
464         if (ret)
465                 return ret;
466
467         ret = logfs_init_gc(sb);
468         if (ret)
469                 return ret;
470
471         ret = logfs_init_journal(sb);
472         if (ret)
473                 return ret;
474
475         return 0;
476 }
477
478 static void logfs_kill_sb(struct super_block *sb)
479 {
480         struct logfs_super *super = logfs_super(sb);
481
482         log_super("LogFS: Start unmounting\n");
483         /* Alias entries slow down mount, so evict as many as possible */
484         sync_filesystem(sb);
485         logfs_write_anchor(super->s_master_inode);
486
487         /*
488          * From this point on alias entries are simply dropped - and any
489          * writes to the object store are considered bugs.
490          */
491         super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
492         log_super("LogFS: Now in shutdown\n");
493         generic_shutdown_super(sb);
494
495         BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
496
497         logfs_cleanup_gc(sb);
498         logfs_cleanup_journal(sb);
499         logfs_cleanup_areas(sb);
500         logfs_cleanup_rw(sb);
501         super->s_devops->put_device(sb);
502         mempool_destroy(super->s_btree_pool);
503         mempool_destroy(super->s_alias_pool);
504         kfree(super);
505         log_super("LogFS: Finished unmounting\n");
506 }
507
508 int logfs_get_sb_device(struct file_system_type *type, int flags,
509                 struct mtd_info *mtd, struct block_device *bdev,
510                 const struct logfs_device_ops *devops, struct vfsmount *mnt)
511 {
512         struct logfs_super *super;
513         struct super_block *sb;
514         int err = -ENOMEM;
515         static int mount_count;
516
517         log_super("LogFS: Start mount %x\n", mount_count++);
518         super = kzalloc(sizeof(*super), GFP_KERNEL);
519         if (!super)
520                 goto err0;
521
522         super->s_mtd    = mtd;
523         super->s_bdev   = bdev;
524         err = -EINVAL;
525         sb = sget(type, logfs_sb_test, logfs_sb_set, super);
526         if (IS_ERR(sb))
527                 goto err0;
528
529         if (sb->s_root) {
530                 /* Device is already in use */
531                 err = 0;
532                 simple_set_mnt(mnt, sb);
533                 goto err0;
534         }
535
536         super->s_devops = devops;
537
538         /*
539          * sb->s_maxbytes is limited to 8TB.  On 32bit systems, the page cache
540          * only covers 16TB and the upper 8TB are used for indirect blocks.
541          * On 64bit system we could bump up the limit, but that would make
542          * the filesystem incompatible with 32bit systems.
543          */
544         sb->s_maxbytes  = (1ull << 43) - 1;
545         sb->s_op        = &logfs_super_operations;
546         sb->s_flags     = flags | MS_NOATIME;
547
548         err = logfs_read_sb(sb);
549         if (err)
550                 goto err1;
551
552         sb->s_flags |= MS_ACTIVE;
553         err = logfs_get_sb_final(sb, mnt);
554         if (err)
555                 goto err1;
556         return 0;
557
558 err1:
559         up_write(&sb->s_umount);
560         deactivate_super(sb);
561         return err;
562 err0:
563         kfree(super);
564         //devops->put_device(sb);
565         return err;
566 }
567
568 static int logfs_get_sb(struct file_system_type *type, int flags,
569                 const char *devname, void *data, struct vfsmount *mnt)
570 {
571         ulong mtdnr;
572
573         if (!devname)
574                 return logfs_get_sb_bdev(type, flags, devname, mnt);
575         if (strncmp(devname, "mtd", 3))
576                 return logfs_get_sb_bdev(type, flags, devname, mnt);
577
578         {
579                 char *garbage;
580                 mtdnr = simple_strtoul(devname+3, &garbage, 0);
581                 if (*garbage)
582                         return -EINVAL;
583         }
584
585         return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
586 }
587
588 static struct file_system_type logfs_fs_type = {
589         .owner          = THIS_MODULE,
590         .name           = "logfs",
591         .get_sb         = logfs_get_sb,
592         .kill_sb        = logfs_kill_sb,
593         .fs_flags       = FS_REQUIRES_DEV,
594
595 };
596
597 static int __init logfs_init(void)
598 {
599         int ret;
600
601         emergency_page = alloc_pages(GFP_KERNEL, 0);
602         if (!emergency_page)
603                 return -ENOMEM;
604
605         ret = logfs_compr_init();
606         if (ret)
607                 goto out1;
608
609         ret = logfs_init_inode_cache();
610         if (ret)
611                 goto out2;
612
613         return register_filesystem(&logfs_fs_type);
614 out2:
615         logfs_compr_exit();
616 out1:
617         __free_pages(emergency_page, 0);
618         return ret;
619 }
620
621 static void __exit logfs_exit(void)
622 {
623         unregister_filesystem(&logfs_fs_type);
624         logfs_destroy_inode_cache();
625         logfs_compr_exit();
626         __free_pages(emergency_page, 0);
627 }
628
629 module_init(logfs_init);
630 module_exit(logfs_exit);
631
632 MODULE_LICENSE("GPL v2");
633 MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
634 MODULE_DESCRIPTION("scalable flash filesystem");