convert exofs to ->evict_inode()
[linux-3.10.git] / fs / exofs / super.c
1 /*
2  * Copyright (C) 2005, 2006
3  * Avishay Traeger (avishay@gmail.com)
4  * Copyright (C) 2008, 2009
5  * Boaz Harrosh <bharrosh@panasas.com>
6  *
7  * Copyrights for code taken from ext2:
8  *     Copyright (C) 1992, 1993, 1994, 1995
9  *     Remy Card (card@masi.ibp.fr)
10  *     Laboratoire MASI - Institut Blaise Pascal
11  *     Universite Pierre et Marie Curie (Paris VI)
12  *     from
13  *     linux/fs/minix/inode.c
14  *     Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  * This file is part of exofs.
17  *
18  * exofs is free software; you can redistribute it and/or modify
19  * it under the terms of the GNU General Public License as published by
20  * the Free Software Foundation.  Since it is based on ext2, and the only
21  * valid version of GPL for the Linux kernel is version 2, the only valid
22  * version of GPL for exofs is version 2.
23  *
24  * exofs is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27  * GNU General Public License for more details.
28  *
29  * You should have received a copy of the GNU General Public License
30  * along with exofs; if not, write to the Free Software
31  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
32  */
33
34 #include <linux/smp_lock.h>
35 #include <linux/string.h>
36 #include <linux/parser.h>
37 #include <linux/vfs.h>
38 #include <linux/random.h>
39 #include <linux/exportfs.h>
40 #include <linux/slab.h>
41
42 #include "exofs.h"
43
44 /******************************************************************************
45  * MOUNT OPTIONS
46  *****************************************************************************/
47
48 /*
49  * struct to hold what we get from mount options
50  */
51 struct exofs_mountopt {
52         const char *dev_name;
53         uint64_t pid;
54         int timeout;
55 };
56
57 /*
58  * exofs-specific mount-time options.
59  */
60 enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
61
62 /*
63  * Our mount-time options.  These should ideally be 64-bit unsigned, but the
64  * kernel's parsing functions do not currently support that.  32-bit should be
65  * sufficient for most applications now.
66  */
67 static match_table_t tokens = {
68         {Opt_pid, "pid=%u"},
69         {Opt_to, "to=%u"},
70         {Opt_err, NULL}
71 };
72
73 /*
74  * The main option parsing method.  Also makes sure that all of the mandatory
75  * mount options were set.
76  */
77 static int parse_options(char *options, struct exofs_mountopt *opts)
78 {
79         char *p;
80         substring_t args[MAX_OPT_ARGS];
81         int option;
82         bool s_pid = false;
83
84         EXOFS_DBGMSG("parse_options %s\n", options);
85         /* defaults */
86         memset(opts, 0, sizeof(*opts));
87         opts->timeout = BLK_DEFAULT_SG_TIMEOUT;
88
89         while ((p = strsep(&options, ",")) != NULL) {
90                 int token;
91                 char str[32];
92
93                 if (!*p)
94                         continue;
95
96                 token = match_token(p, tokens, args);
97                 switch (token) {
98                 case Opt_pid:
99                         if (0 == match_strlcpy(str, &args[0], sizeof(str)))
100                                 return -EINVAL;
101                         opts->pid = simple_strtoull(str, NULL, 0);
102                         if (opts->pid < EXOFS_MIN_PID) {
103                                 EXOFS_ERR("Partition ID must be >= %u",
104                                           EXOFS_MIN_PID);
105                                 return -EINVAL;
106                         }
107                         s_pid = 1;
108                         break;
109                 case Opt_to:
110                         if (match_int(&args[0], &option))
111                                 return -EINVAL;
112                         if (option <= 0) {
113                                 EXOFS_ERR("Timout must be > 0");
114                                 return -EINVAL;
115                         }
116                         opts->timeout = option * HZ;
117                         break;
118                 }
119         }
120
121         if (!s_pid) {
122                 EXOFS_ERR("Need to specify the following options:\n");
123                 EXOFS_ERR("    -o pid=pid_no_to_use\n");
124                 return -EINVAL;
125         }
126
127         return 0;
128 }
129
130 /******************************************************************************
131  * INODE CACHE
132  *****************************************************************************/
133
134 /*
135  * Our inode cache.  Isn't it pretty?
136  */
137 static struct kmem_cache *exofs_inode_cachep;
138
139 /*
140  * Allocate an inode in the cache
141  */
142 static struct inode *exofs_alloc_inode(struct super_block *sb)
143 {
144         struct exofs_i_info *oi;
145
146         oi = kmem_cache_alloc(exofs_inode_cachep, GFP_KERNEL);
147         if (!oi)
148                 return NULL;
149
150         oi->vfs_inode.i_version = 1;
151         return &oi->vfs_inode;
152 }
153
154 /*
155  * Remove an inode from the cache
156  */
157 static void exofs_destroy_inode(struct inode *inode)
158 {
159         kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
160 }
161
162 /*
163  * Initialize the inode
164  */
165 static void exofs_init_once(void *foo)
166 {
167         struct exofs_i_info *oi = foo;
168
169         inode_init_once(&oi->vfs_inode);
170 }
171
172 /*
173  * Create and initialize the inode cache
174  */
175 static int init_inodecache(void)
176 {
177         exofs_inode_cachep = kmem_cache_create("exofs_inode_cache",
178                                 sizeof(struct exofs_i_info), 0,
179                                 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
180                                 exofs_init_once);
181         if (exofs_inode_cachep == NULL)
182                 return -ENOMEM;
183         return 0;
184 }
185
186 /*
187  * Destroy the inode cache
188  */
189 static void destroy_inodecache(void)
190 {
191         kmem_cache_destroy(exofs_inode_cachep);
192 }
193
194 /******************************************************************************
195  * SUPERBLOCK FUNCTIONS
196  *****************************************************************************/
197 static const struct super_operations exofs_sops;
198 static const struct export_operations exofs_export_ops;
199
200 /*
201  * Write the superblock to the OSD
202  */
203 int exofs_sync_fs(struct super_block *sb, int wait)
204 {
205         struct exofs_sb_info *sbi;
206         struct exofs_fscb *fscb;
207         struct exofs_io_state *ios;
208         int ret = -ENOMEM;
209
210         lock_super(sb);
211         sbi = sb->s_fs_info;
212         fscb = &sbi->s_fscb;
213
214         ret = exofs_get_io_state(&sbi->layout, &ios);
215         if (ret)
216                 goto out;
217
218         /* Note: We only write the changing part of the fscb. .i.e upto the
219          *       the fscb->s_dev_table_oid member. There is no read-modify-write
220          *       here.
221          */
222         ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
223         memset(fscb, 0, ios->length);
224         fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
225         fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
226         fscb->s_magic = cpu_to_le16(sb->s_magic);
227         fscb->s_newfs = 0;
228         fscb->s_version = EXOFS_FSCB_VER;
229
230         ios->obj.id = EXOFS_SUPER_ID;
231         ios->offset = 0;
232         ios->kern_buff = fscb;
233         ios->cred = sbi->s_cred;
234
235         ret = exofs_sbi_write(ios);
236         if (unlikely(ret)) {
237                 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
238                 goto out;
239         }
240         sb->s_dirt = 0;
241
242 out:
243         EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
244         exofs_put_io_state(ios);
245         unlock_super(sb);
246         return ret;
247 }
248
249 static void exofs_write_super(struct super_block *sb)
250 {
251         if (!(sb->s_flags & MS_RDONLY))
252                 exofs_sync_fs(sb, 1);
253         else
254                 sb->s_dirt = 0;
255 }
256
257 static void _exofs_print_device(const char *msg, const char *dev_path,
258                                 struct osd_dev *od, u64 pid)
259 {
260         const struct osd_dev_info *odi = osduld_device_info(od);
261
262         printk(KERN_NOTICE "exofs: %s %s osd_name-%s pid-0x%llx\n",
263                 msg, dev_path ?: "", odi->osdname, _LLU(pid));
264 }
265
266 void exofs_free_sbi(struct exofs_sb_info *sbi)
267 {
268         while (sbi->layout.s_numdevs) {
269                 int i = --sbi->layout.s_numdevs;
270                 struct osd_dev *od = sbi->layout.s_ods[i];
271
272                 if (od) {
273                         sbi->layout.s_ods[i] = NULL;
274                         osduld_put_device(od);
275                 }
276         }
277         kfree(sbi);
278 }
279
280 /*
281  * This function is called when the vfs is freeing the superblock.  We just
282  * need to free our own part.
283  */
284 static void exofs_put_super(struct super_block *sb)
285 {
286         int num_pend;
287         struct exofs_sb_info *sbi = sb->s_fs_info;
288
289         if (sb->s_dirt)
290                 exofs_write_super(sb);
291
292         /* make sure there are no pending commands */
293         for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
294              num_pend = atomic_read(&sbi->s_curr_pending)) {
295                 wait_queue_head_t wq;
296                 init_waitqueue_head(&wq);
297                 wait_event_timeout(wq,
298                                   (atomic_read(&sbi->s_curr_pending) == 0),
299                                   msecs_to_jiffies(100));
300         }
301
302         _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
303                             sbi->layout.s_pid);
304
305         bdi_destroy(&sbi->bdi);
306         exofs_free_sbi(sbi);
307         sb->s_fs_info = NULL;
308 }
309
310 static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
311                                     struct exofs_device_table *dt)
312 {
313         u64 stripe_length;
314
315         sbi->data_map.odm_num_comps   =
316                                 le32_to_cpu(dt->dt_data_map.cb_num_comps);
317         sbi->data_map.odm_stripe_unit =
318                                 le64_to_cpu(dt->dt_data_map.cb_stripe_unit);
319         sbi->data_map.odm_group_width =
320                                 le32_to_cpu(dt->dt_data_map.cb_group_width);
321         sbi->data_map.odm_group_depth =
322                                 le32_to_cpu(dt->dt_data_map.cb_group_depth);
323         sbi->data_map.odm_mirror_cnt  =
324                                 le32_to_cpu(dt->dt_data_map.cb_mirror_cnt);
325         sbi->data_map.odm_raid_algorithm  =
326                                 le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
327
328 /* FIXME: Only raid0 for now. if not so, do not mount */
329         if (sbi->data_map.odm_num_comps != numdevs) {
330                 EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n",
331                           sbi->data_map.odm_num_comps, numdevs);
332                 return -EINVAL;
333         }
334         if (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) {
335                 EXOFS_ERR("Only RAID_0 for now\n");
336                 return -EINVAL;
337         }
338         if (0 != (numdevs % (sbi->data_map.odm_mirror_cnt + 1))) {
339                 EXOFS_ERR("Data Map wrong, numdevs=%d mirrors=%d\n",
340                           numdevs, sbi->data_map.odm_mirror_cnt);
341                 return -EINVAL;
342         }
343
344         if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) {
345                 EXOFS_ERR("Stripe Unit(0x%llx)"
346                           " must be Multples of PAGE_SIZE(0x%lx)\n",
347                           _LLU(sbi->data_map.odm_stripe_unit), PAGE_SIZE);
348                 return -EINVAL;
349         }
350
351         sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit;
352         sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1;
353
354         if (sbi->data_map.odm_group_width) {
355                 sbi->layout.group_width = sbi->data_map.odm_group_width;
356                 sbi->layout.group_depth = sbi->data_map.odm_group_depth;
357                 if (!sbi->layout.group_depth) {
358                         EXOFS_ERR("group_depth == 0 && group_width != 0\n");
359                         return -EINVAL;
360                 }
361                 sbi->layout.group_count = sbi->data_map.odm_num_comps /
362                                                 sbi->layout.mirrors_p1 /
363                                                 sbi->data_map.odm_group_width;
364         } else {
365                 if (sbi->data_map.odm_group_depth) {
366                         printk(KERN_NOTICE "Warning: group_depth ignored "
367                                 "group_width == 0 && group_depth == %d\n",
368                                 sbi->data_map.odm_group_depth);
369                         sbi->data_map.odm_group_depth = 0;
370                 }
371                 sbi->layout.group_width = sbi->data_map.odm_num_comps /
372                                                         sbi->layout.mirrors_p1;
373                 sbi->layout.group_depth = -1;
374                 sbi->layout.group_count = 1;
375         }
376
377         stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit;
378         if (stripe_length >= (1ULL << 32)) {
379                 EXOFS_ERR("Total Stripe length(0x%llx)"
380                           " >= 32bit is not supported\n", _LLU(stripe_length));
381                 return -EINVAL;
382         }
383
384         return 0;
385 }
386
387 /* @odi is valid only as long as @fscb_dev is valid */
388 static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
389                              struct osd_dev_info *odi)
390 {
391         odi->systemid_len = le32_to_cpu(dt_dev->systemid_len);
392         memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len);
393
394         odi->osdname_len = le32_to_cpu(dt_dev->osdname_len);
395         odi->osdname = dt_dev->osdname;
396
397         /* FIXME support long names. Will need a _put function */
398         if (dt_dev->long_name_offset)
399                 return -EINVAL;
400
401         /* Make sure osdname is printable!
402          * mkexofs should give us space for a null-terminator else the
403          * device-table is invalid.
404          */
405         if (unlikely(odi->osdname_len >= sizeof(dt_dev->osdname)))
406                 odi->osdname_len = sizeof(dt_dev->osdname) - 1;
407         dt_dev->osdname[odi->osdname_len] = 0;
408
409         /* If it's all zeros something is bad we read past end-of-obj */
410         return !(odi->systemid_len || odi->osdname_len);
411 }
412
413 static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
414                                        unsigned table_count)
415 {
416         struct exofs_sb_info *sbi = *psbi;
417         struct osd_dev *fscb_od;
418         struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
419                                  .id = EXOFS_DEVTABLE_ID};
420         struct exofs_device_table *dt;
421         unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
422                                              sizeof(*dt);
423         unsigned numdevs, i;
424         int ret;
425
426         dt = kmalloc(table_bytes, GFP_KERNEL);
427         if (unlikely(!dt)) {
428                 EXOFS_ERR("ERROR: allocating %x bytes for device table\n",
429                           table_bytes);
430                 return -ENOMEM;
431         }
432
433         fscb_od = sbi->layout.s_ods[0];
434         sbi->layout.s_ods[0] = NULL;
435         sbi->layout.s_numdevs = 0;
436         ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
437         if (unlikely(ret)) {
438                 EXOFS_ERR("ERROR: reading device table\n");
439                 goto out;
440         }
441
442         numdevs = le64_to_cpu(dt->dt_num_devices);
443         if (unlikely(!numdevs)) {
444                 ret = -EINVAL;
445                 goto out;
446         }
447         WARN_ON(table_count != numdevs);
448
449         ret = _read_and_match_data_map(sbi, numdevs, dt);
450         if (unlikely(ret))
451                 goto out;
452
453         if (likely(numdevs > 1)) {
454                 unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]);
455
456                 sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
457                 if (unlikely(!sbi)) {
458                         ret = -ENOMEM;
459                         goto out;
460                 }
461                 memset(&sbi->layout.s_ods[1], 0,
462                        size - sizeof(sbi->layout.s_ods[0]));
463                 *psbi = sbi;
464         }
465
466         for (i = 0; i < numdevs; i++) {
467                 struct exofs_fscb fscb;
468                 struct osd_dev_info odi;
469                 struct osd_dev *od;
470
471                 if (exofs_devs_2_odi(&dt->dt_dev_table[i], &odi)) {
472                         EXOFS_ERR("ERROR: Read all-zeros device entry\n");
473                         ret = -EINVAL;
474                         goto out;
475                 }
476
477                 printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",
478                        i, odi.osdname);
479
480                 /* On all devices the device table is identical. The user can
481                  * specify any one of the participating devices on the command
482                  * line. We always keep them in device-table order.
483                  */
484                 if (fscb_od && osduld_device_same(fscb_od, &odi)) {
485                         sbi->layout.s_ods[i] = fscb_od;
486                         ++sbi->layout.s_numdevs;
487                         fscb_od = NULL;
488                         continue;
489                 }
490
491                 od = osduld_info_lookup(&odi);
492                 if (unlikely(IS_ERR(od))) {
493                         ret = PTR_ERR(od);
494                         EXOFS_ERR("ERROR: device requested is not found "
495                                   "osd_name-%s =>%d\n", odi.osdname, ret);
496                         goto out;
497                 }
498
499                 sbi->layout.s_ods[i] = od;
500                 ++sbi->layout.s_numdevs;
501
502                 /* Read the fscb of the other devices to make sure the FS
503                  * partition is there.
504                  */
505                 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb,
506                                       sizeof(fscb));
507                 if (unlikely(ret)) {
508                         EXOFS_ERR("ERROR: Malformed participating device "
509                                   "error reading fscb osd_name-%s\n",
510                                   odi.osdname);
511                         goto out;
512                 }
513
514                 /* TODO: verify other information is correct and FS-uuid
515                  *       matches. Benny what did you say about device table
516                  *       generation and old devices?
517                  */
518         }
519
520 out:
521         kfree(dt);
522         if (unlikely(!ret && fscb_od)) {
523                 EXOFS_ERR(
524                       "ERROR: Bad device-table container device not present\n");
525                 osduld_put_device(fscb_od);
526                 ret = -EINVAL;
527         }
528
529         return ret;
530 }
531
532 /*
533  * Read the superblock from the OSD and fill in the fields
534  */
535 static int exofs_fill_super(struct super_block *sb, void *data, int silent)
536 {
537         struct inode *root;
538         struct exofs_mountopt *opts = data;
539         struct exofs_sb_info *sbi;      /*extended info                  */
540         struct osd_dev *od;             /* Master device                 */
541         struct exofs_fscb fscb;         /*on-disk superblock info        */
542         struct osd_obj_id obj;
543         unsigned table_count;
544         int ret;
545
546         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
547         if (!sbi)
548                 return -ENOMEM;
549
550         ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
551         if (ret)
552                 goto free_bdi;
553
554         /* use mount options to fill superblock */
555         od = osduld_path_lookup(opts->dev_name);
556         if (IS_ERR(od)) {
557                 ret = PTR_ERR(od);
558                 goto free_sbi;
559         }
560
561         /* Default layout in case we do not have a device-table */
562         sbi->layout.stripe_unit = PAGE_SIZE;
563         sbi->layout.mirrors_p1 = 1;
564         sbi->layout.group_width = 1;
565         sbi->layout.group_depth = -1;
566         sbi->layout.group_count = 1;
567         sbi->layout.s_ods[0] = od;
568         sbi->layout.s_numdevs = 1;
569         sbi->layout.s_pid = opts->pid;
570         sbi->s_timeout = opts->timeout;
571
572         /* fill in some other data by hand */
573         memset(sb->s_id, 0, sizeof(sb->s_id));
574         strcpy(sb->s_id, "exofs");
575         sb->s_blocksize = EXOFS_BLKSIZE;
576         sb->s_blocksize_bits = EXOFS_BLKSHIFT;
577         sb->s_maxbytes = MAX_LFS_FILESIZE;
578         atomic_set(&sbi->s_curr_pending, 0);
579         sb->s_bdev = NULL;
580         sb->s_dev = 0;
581
582         obj.partition = sbi->layout.s_pid;
583         obj.id = EXOFS_SUPER_ID;
584         exofs_make_credential(sbi->s_cred, &obj);
585
586         ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
587         if (unlikely(ret))
588                 goto free_sbi;
589
590         sb->s_magic = le16_to_cpu(fscb.s_magic);
591         sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
592         sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
593
594         /* make sure what we read from the object store is correct */
595         if (sb->s_magic != EXOFS_SUPER_MAGIC) {
596                 if (!silent)
597                         EXOFS_ERR("ERROR: Bad magic value\n");
598                 ret = -EINVAL;
599                 goto free_sbi;
600         }
601         if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) {
602                 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
603                           EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
604                 ret = -EINVAL;
605                 goto free_sbi;
606         }
607
608         /* start generation numbers from a random point */
609         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
610         spin_lock_init(&sbi->s_next_gen_lock);
611
612         table_count = le64_to_cpu(fscb.s_dev_table_count);
613         if (table_count) {
614                 ret = exofs_read_lookup_dev_table(&sbi, table_count);
615                 if (unlikely(ret))
616                         goto free_sbi;
617         }
618
619         /* set up operation vectors */
620         sb->s_bdi = &sbi->bdi;
621         sb->s_fs_info = sbi;
622         sb->s_op = &exofs_sops;
623         sb->s_export_op = &exofs_export_ops;
624         root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
625         if (IS_ERR(root)) {
626                 EXOFS_ERR("ERROR: exofs_iget failed\n");
627                 ret = PTR_ERR(root);
628                 goto free_sbi;
629         }
630         sb->s_root = d_alloc_root(root);
631         if (!sb->s_root) {
632                 iput(root);
633                 EXOFS_ERR("ERROR: get root inode failed\n");
634                 ret = -ENOMEM;
635                 goto free_sbi;
636         }
637
638         if (!S_ISDIR(root->i_mode)) {
639                 dput(sb->s_root);
640                 sb->s_root = NULL;
641                 EXOFS_ERR("ERROR: corrupt root inode (mode = %hd)\n",
642                        root->i_mode);
643                 ret = -EINVAL;
644                 goto free_sbi;
645         }
646
647         _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
648                             sbi->layout.s_pid);
649         return 0;
650
651 free_sbi:
652         bdi_destroy(&sbi->bdi);
653 free_bdi:
654         EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
655                   opts->dev_name, sbi->layout.s_pid, ret);
656         exofs_free_sbi(sbi);
657         return ret;
658 }
659
660 /*
661  * Set up the superblock (calls exofs_fill_super eventually)
662  */
663 static int exofs_get_sb(struct file_system_type *type,
664                           int flags, const char *dev_name,
665                           void *data, struct vfsmount *mnt)
666 {
667         struct exofs_mountopt opts;
668         int ret;
669
670         ret = parse_options(data, &opts);
671         if (ret)
672                 return ret;
673
674         opts.dev_name = dev_name;
675         return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt);
676 }
677
678 /*
679  * Return information about the file system state in the buffer.  This is used
680  * by the 'df' command, for example.
681  */
682 static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
683 {
684         struct super_block *sb = dentry->d_sb;
685         struct exofs_sb_info *sbi = sb->s_fs_info;
686         struct exofs_io_state *ios;
687         struct osd_attr attrs[] = {
688                 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
689                         OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
690                 ATTR_DEF(OSD_APAGE_PARTITION_INFORMATION,
691                         OSD_ATTR_PI_USED_CAPACITY, sizeof(__be64)),
692         };
693         uint64_t capacity = ULLONG_MAX;
694         uint64_t used = ULLONG_MAX;
695         uint8_t cred_a[OSD_CAP_LEN];
696         int ret;
697
698         ret = exofs_get_io_state(&sbi->layout, &ios);
699         if (ret) {
700                 EXOFS_DBGMSG("exofs_get_io_state failed.\n");
701                 return ret;
702         }
703
704         exofs_make_credential(cred_a, &ios->obj);
705         ios->cred = sbi->s_cred;
706         ios->in_attr = attrs;
707         ios->in_attr_len = ARRAY_SIZE(attrs);
708
709         ret = exofs_sbi_read(ios);
710         if (unlikely(ret))
711                 goto out;
712
713         ret = extract_attr_from_ios(ios, &attrs[0]);
714         if (likely(!ret)) {
715                 capacity = get_unaligned_be64(attrs[0].val_ptr);
716                 if (unlikely(!capacity))
717                         capacity = ULLONG_MAX;
718         } else
719                 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
720
721         ret = extract_attr_from_ios(ios, &attrs[1]);
722         if (likely(!ret))
723                 used = get_unaligned_be64(attrs[1].val_ptr);
724         else
725                 EXOFS_DBGMSG("exofs_statfs: get used-space failed.\n");
726
727         /* fill in the stats buffer */
728         buf->f_type = EXOFS_SUPER_MAGIC;
729         buf->f_bsize = EXOFS_BLKSIZE;
730         buf->f_blocks = capacity >> 9;
731         buf->f_bfree = (capacity - used) >> 9;
732         buf->f_bavail = buf->f_bfree;
733         buf->f_files = sbi->s_numfiles;
734         buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles;
735         buf->f_namelen = EXOFS_NAME_LEN;
736
737 out:
738         exofs_put_io_state(ios);
739         return ret;
740 }
741
742 static const struct super_operations exofs_sops = {
743         .alloc_inode    = exofs_alloc_inode,
744         .destroy_inode  = exofs_destroy_inode,
745         .write_inode    = exofs_write_inode,
746         .evict_inode    = exofs_evict_inode,
747         .put_super      = exofs_put_super,
748         .write_super    = exofs_write_super,
749         .sync_fs        = exofs_sync_fs,
750         .statfs         = exofs_statfs,
751 };
752
753 /******************************************************************************
754  * EXPORT OPERATIONS
755  *****************************************************************************/
756
757 struct dentry *exofs_get_parent(struct dentry *child)
758 {
759         unsigned long ino = exofs_parent_ino(child);
760
761         if (!ino)
762                 return NULL;
763
764         return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));
765 }
766
767 static struct inode *exofs_nfs_get_inode(struct super_block *sb,
768                 u64 ino, u32 generation)
769 {
770         struct inode *inode;
771
772         inode = exofs_iget(sb, ino);
773         if (IS_ERR(inode))
774                 return ERR_CAST(inode);
775         if (generation && inode->i_generation != generation) {
776                 /* we didn't find the right inode.. */
777                 iput(inode);
778                 return ERR_PTR(-ESTALE);
779         }
780         return inode;
781 }
782
783 static struct dentry *exofs_fh_to_dentry(struct super_block *sb,
784                                 struct fid *fid, int fh_len, int fh_type)
785 {
786         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
787                                     exofs_nfs_get_inode);
788 }
789
790 static struct dentry *exofs_fh_to_parent(struct super_block *sb,
791                                 struct fid *fid, int fh_len, int fh_type)
792 {
793         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
794                                     exofs_nfs_get_inode);
795 }
796
797 static const struct export_operations exofs_export_ops = {
798         .fh_to_dentry = exofs_fh_to_dentry,
799         .fh_to_parent = exofs_fh_to_parent,
800         .get_parent = exofs_get_parent,
801 };
802
803 /******************************************************************************
804  * INSMOD/RMMOD
805  *****************************************************************************/
806
807 /*
808  * struct that describes this file system
809  */
810 static struct file_system_type exofs_type = {
811         .owner          = THIS_MODULE,
812         .name           = "exofs",
813         .get_sb         = exofs_get_sb,
814         .kill_sb        = generic_shutdown_super,
815 };
816
817 static int __init init_exofs(void)
818 {
819         int err;
820
821         err = init_inodecache();
822         if (err)
823                 goto out;
824
825         err = register_filesystem(&exofs_type);
826         if (err)
827                 goto out_d;
828
829         return 0;
830 out_d:
831         destroy_inodecache();
832 out:
833         return err;
834 }
835
836 static void __exit exit_exofs(void)
837 {
838         unregister_filesystem(&exofs_type);
839         destroy_inodecache();
840 }
841
842 MODULE_AUTHOR("Avishay Traeger <avishay@gmail.com>");
843 MODULE_DESCRIPTION("exofs");
844 MODULE_LICENSE("GPL");
845
846 module_init(init_exofs)
847 module_exit(exit_exofs)