ocfs2: Call refcount tree remove process properly.
[linux-2.6.git] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59
60 struct ocfs2_xattr_def_value_root {
61         struct ocfs2_xattr_value_root   xv;
62         struct ocfs2_extent_rec         er;
63 };
64
65 struct ocfs2_xattr_bucket {
66         /* The inode these xattrs are associated with */
67         struct inode *bu_inode;
68
69         /* The actual buffers that make up the bucket */
70         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
71
72         /* How many blocks make up one bucket for this filesystem */
73         int bu_blocks;
74 };
75
76 struct ocfs2_xattr_set_ctxt {
77         handle_t *handle;
78         struct ocfs2_alloc_context *meta_ac;
79         struct ocfs2_alloc_context *data_ac;
80         struct ocfs2_cached_dealloc_ctxt dealloc;
81 };
82
83 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
84 #define OCFS2_XATTR_INLINE_SIZE 80
85 #define OCFS2_XATTR_HEADER_GAP  4
86 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
87                                          - sizeof(struct ocfs2_xattr_header) \
88                                          - OCFS2_XATTR_HEADER_GAP)
89 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
90                                          - sizeof(struct ocfs2_xattr_block) \
91                                          - sizeof(struct ocfs2_xattr_header) \
92                                          - OCFS2_XATTR_HEADER_GAP)
93
94 static struct ocfs2_xattr_def_value_root def_xv = {
95         .xv.xr_list.l_count = cpu_to_le16(1),
96 };
97
98 struct xattr_handler *ocfs2_xattr_handlers[] = {
99         &ocfs2_xattr_user_handler,
100 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103 #endif
104         &ocfs2_xattr_trusted_handler,
105         &ocfs2_xattr_security_handler,
106         NULL
107 };
108
109 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
110         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
111 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
112         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
113                                         = &ocfs2_xattr_acl_access_handler,
114         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
115                                         = &ocfs2_xattr_acl_default_handler,
116 #endif
117         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
118         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
119 };
120
121 struct ocfs2_xattr_info {
122         int name_index;
123         const char *name;
124         const void *value;
125         size_t value_len;
126 };
127
128 struct ocfs2_xattr_search {
129         struct buffer_head *inode_bh;
130         /*
131          * xattr_bh point to the block buffer head which has extended attribute
132          * when extended attribute in inode, xattr_bh is equal to inode_bh.
133          */
134         struct buffer_head *xattr_bh;
135         struct ocfs2_xattr_header *header;
136         struct ocfs2_xattr_bucket *bucket;
137         void *base;
138         void *end;
139         struct ocfs2_xattr_entry *here;
140         int not_found;
141 };
142
143 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
144                                              struct ocfs2_xattr_header *xh,
145                                              int index,
146                                              int *block_off,
147                                              int *new_offset);
148
149 static int ocfs2_xattr_block_find(struct inode *inode,
150                                   int name_index,
151                                   const char *name,
152                                   struct ocfs2_xattr_search *xs);
153 static int ocfs2_xattr_index_block_find(struct inode *inode,
154                                         struct buffer_head *root_bh,
155                                         int name_index,
156                                         const char *name,
157                                         struct ocfs2_xattr_search *xs);
158
159 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
160                                         struct buffer_head *blk_bh,
161                                         char *buffer,
162                                         size_t buffer_size);
163
164 static int ocfs2_xattr_create_index_block(struct inode *inode,
165                                           struct ocfs2_xattr_search *xs,
166                                           struct ocfs2_xattr_set_ctxt *ctxt);
167
168 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
169                                              struct ocfs2_xattr_info *xi,
170                                              struct ocfs2_xattr_search *xs,
171                                              struct ocfs2_xattr_set_ctxt *ctxt);
172
173 typedef int (xattr_tree_rec_func)(struct inode *inode,
174                                   struct buffer_head *root_bh,
175                                   u64 blkno, u32 cpos, u32 len, void *para);
176 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
177                                            struct buffer_head *root_bh,
178                                            xattr_tree_rec_func *rec_func,
179                                            void *para);
180 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
181                                         struct ocfs2_xattr_bucket *bucket,
182                                         void *para);
183 static int ocfs2_rm_xattr_cluster(struct inode *inode,
184                                   struct buffer_head *root_bh,
185                                   u64 blkno,
186                                   u32 cpos,
187                                   u32 len,
188                                   void *para);
189
190 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
191                                   u64 src_blk, u64 last_blk, u64 to_blk,
192                                   unsigned int start_bucket,
193                                   u32 *first_hash);
194 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
195                                         struct ocfs2_dinode *di,
196                                         struct ocfs2_xattr_info *xi,
197                                         struct ocfs2_xattr_search *xis,
198                                         struct ocfs2_xattr_search *xbs,
199                                         struct ocfs2_refcount_tree **ref_tree,
200                                         int *meta_need,
201                                         int *credits);
202
203 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
204 {
205         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
206 }
207
208 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
209 {
210         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
211 }
212
213 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
214 {
215         u16 len = sb->s_blocksize -
216                  offsetof(struct ocfs2_xattr_header, xh_entries);
217
218         return len / sizeof(struct ocfs2_xattr_entry);
219 }
220
221 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
222 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
223 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
224
225 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
226 {
227         struct ocfs2_xattr_bucket *bucket;
228         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
229
230         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
231
232         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
233         if (bucket) {
234                 bucket->bu_inode = inode;
235                 bucket->bu_blocks = blks;
236         }
237
238         return bucket;
239 }
240
241 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
242 {
243         int i;
244
245         for (i = 0; i < bucket->bu_blocks; i++) {
246                 brelse(bucket->bu_bhs[i]);
247                 bucket->bu_bhs[i] = NULL;
248         }
249 }
250
251 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
252 {
253         if (bucket) {
254                 ocfs2_xattr_bucket_relse(bucket);
255                 bucket->bu_inode = NULL;
256                 kfree(bucket);
257         }
258 }
259
260 /*
261  * A bucket that has never been written to disk doesn't need to be
262  * read.  We just need the buffer_heads.  Don't call this for
263  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
264  * them fully.
265  */
266 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
267                                    u64 xb_blkno)
268 {
269         int i, rc = 0;
270
271         for (i = 0; i < bucket->bu_blocks; i++) {
272                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
273                                               xb_blkno + i);
274                 if (!bucket->bu_bhs[i]) {
275                         rc = -EIO;
276                         mlog_errno(rc);
277                         break;
278                 }
279
280                 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
281                                            bucket->bu_bhs[i]))
282                         ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
283                                                       bucket->bu_bhs[i]);
284         }
285
286         if (rc)
287                 ocfs2_xattr_bucket_relse(bucket);
288         return rc;
289 }
290
291 /* Read the xattr bucket at xb_blkno */
292 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
293                                    u64 xb_blkno)
294 {
295         int rc;
296
297         rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
298                                bucket->bu_blocks, bucket->bu_bhs, 0,
299                                NULL);
300         if (!rc) {
301                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
302                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
303                                                  bucket->bu_bhs,
304                                                  bucket->bu_blocks,
305                                                  &bucket_xh(bucket)->xh_check);
306                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
307                 if (rc)
308                         mlog_errno(rc);
309         }
310
311         if (rc)
312                 ocfs2_xattr_bucket_relse(bucket);
313         return rc;
314 }
315
316 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
317                                              struct ocfs2_xattr_bucket *bucket,
318                                              int type)
319 {
320         int i, rc = 0;
321
322         for (i = 0; i < bucket->bu_blocks; i++) {
323                 rc = ocfs2_journal_access(handle,
324                                           INODE_CACHE(bucket->bu_inode),
325                                           bucket->bu_bhs[i], type);
326                 if (rc) {
327                         mlog_errno(rc);
328                         break;
329                 }
330         }
331
332         return rc;
333 }
334
335 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
336                                              struct ocfs2_xattr_bucket *bucket)
337 {
338         int i;
339
340         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
341         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
342                                    bucket->bu_bhs, bucket->bu_blocks,
343                                    &bucket_xh(bucket)->xh_check);
344         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
345
346         for (i = 0; i < bucket->bu_blocks; i++)
347                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
348 }
349
350 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
351                                          struct ocfs2_xattr_bucket *src)
352 {
353         int i;
354         int blocksize = src->bu_inode->i_sb->s_blocksize;
355
356         BUG_ON(dest->bu_blocks != src->bu_blocks);
357         BUG_ON(dest->bu_inode != src->bu_inode);
358
359         for (i = 0; i < src->bu_blocks; i++) {
360                 memcpy(bucket_block(dest, i), bucket_block(src, i),
361                        blocksize);
362         }
363 }
364
365 static int ocfs2_validate_xattr_block(struct super_block *sb,
366                                       struct buffer_head *bh)
367 {
368         int rc;
369         struct ocfs2_xattr_block *xb =
370                 (struct ocfs2_xattr_block *)bh->b_data;
371
372         mlog(0, "Validating xattr block %llu\n",
373              (unsigned long long)bh->b_blocknr);
374
375         BUG_ON(!buffer_uptodate(bh));
376
377         /*
378          * If the ecc fails, we return the error but otherwise
379          * leave the filesystem running.  We know any error is
380          * local to this block.
381          */
382         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
383         if (rc)
384                 return rc;
385
386         /*
387          * Errors after here are fatal
388          */
389
390         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
391                 ocfs2_error(sb,
392                             "Extended attribute block #%llu has bad "
393                             "signature %.*s",
394                             (unsigned long long)bh->b_blocknr, 7,
395                             xb->xb_signature);
396                 return -EINVAL;
397         }
398
399         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
400                 ocfs2_error(sb,
401                             "Extended attribute block #%llu has an "
402                             "invalid xb_blkno of %llu",
403                             (unsigned long long)bh->b_blocknr,
404                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
405                 return -EINVAL;
406         }
407
408         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
409                 ocfs2_error(sb,
410                             "Extended attribute block #%llu has an invalid "
411                             "xb_fs_generation of #%u",
412                             (unsigned long long)bh->b_blocknr,
413                             le32_to_cpu(xb->xb_fs_generation));
414                 return -EINVAL;
415         }
416
417         return 0;
418 }
419
420 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
421                                   struct buffer_head **bh)
422 {
423         int rc;
424         struct buffer_head *tmp = *bh;
425
426         rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
427                               ocfs2_validate_xattr_block);
428
429         /* If ocfs2_read_block() got us a new bh, pass it up. */
430         if (!rc && !*bh)
431                 *bh = tmp;
432
433         return rc;
434 }
435
436 static inline const char *ocfs2_xattr_prefix(int name_index)
437 {
438         struct xattr_handler *handler = NULL;
439
440         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
441                 handler = ocfs2_xattr_handler_map[name_index];
442
443         return handler ? handler->prefix : NULL;
444 }
445
446 static u32 ocfs2_xattr_name_hash(struct inode *inode,
447                                  const char *name,
448                                  int name_len)
449 {
450         /* Get hash value of uuid from super block */
451         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
452         int i;
453
454         /* hash extended attribute name */
455         for (i = 0; i < name_len; i++) {
456                 hash = (hash << OCFS2_HASH_SHIFT) ^
457                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
458                        *name++;
459         }
460
461         return hash;
462 }
463
464 /*
465  * ocfs2_xattr_hash_entry()
466  *
467  * Compute the hash of an extended attribute.
468  */
469 static void ocfs2_xattr_hash_entry(struct inode *inode,
470                                    struct ocfs2_xattr_header *header,
471                                    struct ocfs2_xattr_entry *entry)
472 {
473         u32 hash = 0;
474         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
475
476         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
477         entry->xe_name_hash = cpu_to_le32(hash);
478
479         return;
480 }
481
482 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
483 {
484         int size = 0;
485
486         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
487                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
488         else
489                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
490         size += sizeof(struct ocfs2_xattr_entry);
491
492         return size;
493 }
494
495 int ocfs2_calc_security_init(struct inode *dir,
496                              struct ocfs2_security_xattr_info *si,
497                              int *want_clusters,
498                              int *xattr_credits,
499                              struct ocfs2_alloc_context **xattr_ac)
500 {
501         int ret = 0;
502         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
503         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
504                                                  si->value_len);
505
506         /*
507          * The max space of security xattr taken inline is
508          * 256(name) + 80(value) + 16(entry) = 352 bytes,
509          * So reserve one metadata block for it is ok.
510          */
511         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
512             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
513                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
514                 if (ret) {
515                         mlog_errno(ret);
516                         return ret;
517                 }
518                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
519         }
520
521         /* reserve clusters for xattr value which will be set in B tree*/
522         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
523                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
524                                                             si->value_len);
525
526                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
527                                                            new_clusters);
528                 *want_clusters += new_clusters;
529         }
530         return ret;
531 }
532
533 int ocfs2_calc_xattr_init(struct inode *dir,
534                           struct buffer_head *dir_bh,
535                           int mode,
536                           struct ocfs2_security_xattr_info *si,
537                           int *want_clusters,
538                           int *xattr_credits,
539                           int *want_meta)
540 {
541         int ret = 0;
542         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
543         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
544
545         if (si->enable)
546                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
547                                                      si->value_len);
548
549         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
550                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
551                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
552                                         "", NULL, 0);
553                 if (acl_len > 0) {
554                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
555                         if (S_ISDIR(mode))
556                                 a_size <<= 1;
557                 } else if (acl_len != 0 && acl_len != -ENODATA) {
558                         mlog_errno(ret);
559                         return ret;
560                 }
561         }
562
563         if (!(s_size + a_size))
564                 return ret;
565
566         /*
567          * The max space of security xattr taken inline is
568          * 256(name) + 80(value) + 16(entry) = 352 bytes,
569          * The max space of acl xattr taken inline is
570          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
571          * when blocksize = 512, may reserve one more cluser for
572          * xattr bucket, otherwise reserve one metadata block
573          * for them is ok.
574          * If this is a new directory with inline data,
575          * we choose to reserve the entire inline area for
576          * directory contents and force an external xattr block.
577          */
578         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
579             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
580             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
581                 *want_meta = *want_meta + 1;
582                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
583         }
584
585         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
586             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
587                 *want_clusters += 1;
588                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
589         }
590
591         /*
592          * reserve credits and clusters for xattrs which has large value
593          * and have to be set outside
594          */
595         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
596                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
597                                                         si->value_len);
598                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
599                                                            new_clusters);
600                 *want_clusters += new_clusters;
601         }
602         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
603             acl_len > OCFS2_XATTR_INLINE_SIZE) {
604                 /* for directory, it has DEFAULT and ACCESS two types of acls */
605                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
606                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
607                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
608                                                            new_clusters);
609                 *want_clusters += new_clusters;
610         }
611
612         return ret;
613 }
614
615 static int ocfs2_xattr_extend_allocation(struct inode *inode,
616                                          u32 clusters_to_add,
617                                          struct ocfs2_xattr_value_buf *vb,
618                                          struct ocfs2_xattr_set_ctxt *ctxt)
619 {
620         int status = 0;
621         handle_t *handle = ctxt->handle;
622         enum ocfs2_alloc_restarted why;
623         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
624         struct ocfs2_extent_tree et;
625
626         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
627
628         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
629
630         status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
631                               OCFS2_JOURNAL_ACCESS_WRITE);
632         if (status < 0) {
633                 mlog_errno(status);
634                 goto leave;
635         }
636
637         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
638         status = ocfs2_add_clusters_in_btree(handle,
639                                              &et,
640                                              &logical_start,
641                                              clusters_to_add,
642                                              0,
643                                              ctxt->data_ac,
644                                              ctxt->meta_ac,
645                                              &why);
646         if (status < 0) {
647                 mlog_errno(status);
648                 goto leave;
649         }
650
651         status = ocfs2_journal_dirty(handle, vb->vb_bh);
652         if (status < 0) {
653                 mlog_errno(status);
654                 goto leave;
655         }
656
657         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
658
659         /*
660          * We should have already allocated enough space before the transaction,
661          * so no need to restart.
662          */
663         BUG_ON(why != RESTART_NONE || clusters_to_add);
664
665 leave:
666
667         return status;
668 }
669
670 static int __ocfs2_remove_xattr_range(struct inode *inode,
671                                       struct ocfs2_xattr_value_buf *vb,
672                                       u32 cpos, u32 phys_cpos, u32 len,
673                                       unsigned int ext_flags,
674                                       struct ocfs2_xattr_set_ctxt *ctxt)
675 {
676         int ret;
677         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
678         handle_t *handle = ctxt->handle;
679         struct ocfs2_extent_tree et;
680
681         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
682
683         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
684                             OCFS2_JOURNAL_ACCESS_WRITE);
685         if (ret) {
686                 mlog_errno(ret);
687                 goto out;
688         }
689
690         ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
691                                   &ctxt->dealloc);
692         if (ret) {
693                 mlog_errno(ret);
694                 goto out;
695         }
696
697         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
698
699         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
700         if (ret) {
701                 mlog_errno(ret);
702                 goto out;
703         }
704
705         if (ext_flags & OCFS2_EXT_REFCOUNTED)
706                 ret = ocfs2_decrease_refcount(inode, handle,
707                                         ocfs2_blocks_to_clusters(inode->i_sb,
708                                                                  phys_blkno),
709                                         len, ctxt->meta_ac, &ctxt->dealloc, 1);
710         else
711                 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
712                                                   phys_blkno, len);
713         if (ret)
714                 mlog_errno(ret);
715
716 out:
717         return ret;
718 }
719
720 static int ocfs2_xattr_shrink_size(struct inode *inode,
721                                    u32 old_clusters,
722                                    u32 new_clusters,
723                                    struct ocfs2_xattr_value_buf *vb,
724                                    struct ocfs2_xattr_set_ctxt *ctxt)
725 {
726         int ret = 0;
727         unsigned int ext_flags;
728         u32 trunc_len, cpos, phys_cpos, alloc_size;
729         u64 block;
730
731         if (old_clusters <= new_clusters)
732                 return 0;
733
734         cpos = new_clusters;
735         trunc_len = old_clusters - new_clusters;
736         while (trunc_len) {
737                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
738                                                &alloc_size,
739                                                &vb->vb_xv->xr_list, &ext_flags);
740                 if (ret) {
741                         mlog_errno(ret);
742                         goto out;
743                 }
744
745                 if (alloc_size > trunc_len)
746                         alloc_size = trunc_len;
747
748                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
749                                                  phys_cpos, alloc_size,
750                                                  ext_flags, ctxt);
751                 if (ret) {
752                         mlog_errno(ret);
753                         goto out;
754                 }
755
756                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
757                 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
758                                                        block, alloc_size);
759                 cpos += alloc_size;
760                 trunc_len -= alloc_size;
761         }
762
763 out:
764         return ret;
765 }
766
767 static int ocfs2_xattr_value_truncate(struct inode *inode,
768                                       struct ocfs2_xattr_value_buf *vb,
769                                       int len,
770                                       struct ocfs2_xattr_set_ctxt *ctxt)
771 {
772         int ret;
773         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
774         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
775
776         if (new_clusters == old_clusters)
777                 return 0;
778
779         if (new_clusters > old_clusters)
780                 ret = ocfs2_xattr_extend_allocation(inode,
781                                                     new_clusters - old_clusters,
782                                                     vb, ctxt);
783         else
784                 ret = ocfs2_xattr_shrink_size(inode,
785                                               old_clusters, new_clusters,
786                                               vb, ctxt);
787
788         return ret;
789 }
790
791 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
792                                   size_t *result, const char *prefix,
793                                   const char *name, int name_len)
794 {
795         char *p = buffer + *result;
796         int prefix_len = strlen(prefix);
797         int total_len = prefix_len + name_len + 1;
798
799         *result += total_len;
800
801         /* we are just looking for how big our buffer needs to be */
802         if (!size)
803                 return 0;
804
805         if (*result > size)
806                 return -ERANGE;
807
808         memcpy(p, prefix, prefix_len);
809         memcpy(p + prefix_len, name, name_len);
810         p[prefix_len + name_len] = '\0';
811
812         return 0;
813 }
814
815 static int ocfs2_xattr_list_entries(struct inode *inode,
816                                     struct ocfs2_xattr_header *header,
817                                     char *buffer, size_t buffer_size)
818 {
819         size_t result = 0;
820         int i, type, ret;
821         const char *prefix, *name;
822
823         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
824                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
825                 type = ocfs2_xattr_get_type(entry);
826                 prefix = ocfs2_xattr_prefix(type);
827
828                 if (prefix) {
829                         name = (const char *)header +
830                                 le16_to_cpu(entry->xe_name_offset);
831
832                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
833                                                      &result, prefix, name,
834                                                      entry->xe_name_len);
835                         if (ret)
836                                 return ret;
837                 }
838         }
839
840         return result;
841 }
842
843 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
844                                          struct ocfs2_dinode *di)
845 {
846         struct ocfs2_xattr_header *xh;
847         int i;
848
849         xh = (struct ocfs2_xattr_header *)
850                  ((void *)di + inode->i_sb->s_blocksize -
851                  le16_to_cpu(di->i_xattr_inline_size));
852
853         for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
854                 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
855                         return 1;
856
857         return 0;
858 }
859
860 static int ocfs2_xattr_ibody_list(struct inode *inode,
861                                   struct ocfs2_dinode *di,
862                                   char *buffer,
863                                   size_t buffer_size)
864 {
865         struct ocfs2_xattr_header *header = NULL;
866         struct ocfs2_inode_info *oi = OCFS2_I(inode);
867         int ret = 0;
868
869         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
870                 return ret;
871
872         header = (struct ocfs2_xattr_header *)
873                  ((void *)di + inode->i_sb->s_blocksize -
874                  le16_to_cpu(di->i_xattr_inline_size));
875
876         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
877
878         return ret;
879 }
880
881 static int ocfs2_xattr_block_list(struct inode *inode,
882                                   struct ocfs2_dinode *di,
883                                   char *buffer,
884                                   size_t buffer_size)
885 {
886         struct buffer_head *blk_bh = NULL;
887         struct ocfs2_xattr_block *xb;
888         int ret = 0;
889
890         if (!di->i_xattr_loc)
891                 return ret;
892
893         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
894                                      &blk_bh);
895         if (ret < 0) {
896                 mlog_errno(ret);
897                 return ret;
898         }
899
900         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
901         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
902                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
903                 ret = ocfs2_xattr_list_entries(inode, header,
904                                                buffer, buffer_size);
905         } else
906                 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
907                                                    buffer, buffer_size);
908
909         brelse(blk_bh);
910
911         return ret;
912 }
913
914 ssize_t ocfs2_listxattr(struct dentry *dentry,
915                         char *buffer,
916                         size_t size)
917 {
918         int ret = 0, i_ret = 0, b_ret = 0;
919         struct buffer_head *di_bh = NULL;
920         struct ocfs2_dinode *di = NULL;
921         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
922
923         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
924                 return -EOPNOTSUPP;
925
926         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
927                 return ret;
928
929         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
930         if (ret < 0) {
931                 mlog_errno(ret);
932                 return ret;
933         }
934
935         di = (struct ocfs2_dinode *)di_bh->b_data;
936
937         down_read(&oi->ip_xattr_sem);
938         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
939         if (i_ret < 0)
940                 b_ret = 0;
941         else {
942                 if (buffer) {
943                         buffer += i_ret;
944                         size -= i_ret;
945                 }
946                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
947                                                buffer, size);
948                 if (b_ret < 0)
949                         i_ret = 0;
950         }
951         up_read(&oi->ip_xattr_sem);
952         ocfs2_inode_unlock(dentry->d_inode, 0);
953
954         brelse(di_bh);
955
956         return i_ret + b_ret;
957 }
958
959 static int ocfs2_xattr_find_entry(int name_index,
960                                   const char *name,
961                                   struct ocfs2_xattr_search *xs)
962 {
963         struct ocfs2_xattr_entry *entry;
964         size_t name_len;
965         int i, cmp = 1;
966
967         if (name == NULL)
968                 return -EINVAL;
969
970         name_len = strlen(name);
971         entry = xs->here;
972         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
973                 cmp = name_index - ocfs2_xattr_get_type(entry);
974                 if (!cmp)
975                         cmp = name_len - entry->xe_name_len;
976                 if (!cmp)
977                         cmp = memcmp(name, (xs->base +
978                                      le16_to_cpu(entry->xe_name_offset)),
979                                      name_len);
980                 if (cmp == 0)
981                         break;
982                 entry += 1;
983         }
984         xs->here = entry;
985
986         return cmp ? -ENODATA : 0;
987 }
988
989 static int ocfs2_xattr_get_value_outside(struct inode *inode,
990                                          struct ocfs2_xattr_value_root *xv,
991                                          void *buffer,
992                                          size_t len)
993 {
994         u32 cpos, p_cluster, num_clusters, bpc, clusters;
995         u64 blkno;
996         int i, ret = 0;
997         size_t cplen, blocksize;
998         struct buffer_head *bh = NULL;
999         struct ocfs2_extent_list *el;
1000
1001         el = &xv->xr_list;
1002         clusters = le32_to_cpu(xv->xr_clusters);
1003         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1004         blocksize = inode->i_sb->s_blocksize;
1005
1006         cpos = 0;
1007         while (cpos < clusters) {
1008                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1009                                                &num_clusters, el, NULL);
1010                 if (ret) {
1011                         mlog_errno(ret);
1012                         goto out;
1013                 }
1014
1015                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1016                 /* Copy ocfs2_xattr_value */
1017                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1018                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1019                                                &bh, NULL);
1020                         if (ret) {
1021                                 mlog_errno(ret);
1022                                 goto out;
1023                         }
1024
1025                         cplen = len >= blocksize ? blocksize : len;
1026                         memcpy(buffer, bh->b_data, cplen);
1027                         len -= cplen;
1028                         buffer += cplen;
1029
1030                         brelse(bh);
1031                         bh = NULL;
1032                         if (len == 0)
1033                                 break;
1034                 }
1035                 cpos += num_clusters;
1036         }
1037 out:
1038         return ret;
1039 }
1040
1041 static int ocfs2_xattr_ibody_get(struct inode *inode,
1042                                  int name_index,
1043                                  const char *name,
1044                                  void *buffer,
1045                                  size_t buffer_size,
1046                                  struct ocfs2_xattr_search *xs)
1047 {
1048         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1049         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1050         struct ocfs2_xattr_value_root *xv;
1051         size_t size;
1052         int ret = 0;
1053
1054         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1055                 return -ENODATA;
1056
1057         xs->end = (void *)di + inode->i_sb->s_blocksize;
1058         xs->header = (struct ocfs2_xattr_header *)
1059                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1060         xs->base = (void *)xs->header;
1061         xs->here = xs->header->xh_entries;
1062
1063         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1064         if (ret)
1065                 return ret;
1066         size = le64_to_cpu(xs->here->xe_value_size);
1067         if (buffer) {
1068                 if (size > buffer_size)
1069                         return -ERANGE;
1070                 if (ocfs2_xattr_is_local(xs->here)) {
1071                         memcpy(buffer, (void *)xs->base +
1072                                le16_to_cpu(xs->here->xe_name_offset) +
1073                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1074                 } else {
1075                         xv = (struct ocfs2_xattr_value_root *)
1076                                 (xs->base + le16_to_cpu(
1077                                  xs->here->xe_name_offset) +
1078                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1079                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1080                                                             buffer, size);
1081                         if (ret < 0) {
1082                                 mlog_errno(ret);
1083                                 return ret;
1084                         }
1085                 }
1086         }
1087
1088         return size;
1089 }
1090
1091 static int ocfs2_xattr_block_get(struct inode *inode,
1092                                  int name_index,
1093                                  const char *name,
1094                                  void *buffer,
1095                                  size_t buffer_size,
1096                                  struct ocfs2_xattr_search *xs)
1097 {
1098         struct ocfs2_xattr_block *xb;
1099         struct ocfs2_xattr_value_root *xv;
1100         size_t size;
1101         int ret = -ENODATA, name_offset, name_len, i;
1102         int uninitialized_var(block_off);
1103
1104         xs->bucket = ocfs2_xattr_bucket_new(inode);
1105         if (!xs->bucket) {
1106                 ret = -ENOMEM;
1107                 mlog_errno(ret);
1108                 goto cleanup;
1109         }
1110
1111         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1112         if (ret) {
1113                 mlog_errno(ret);
1114                 goto cleanup;
1115         }
1116
1117         if (xs->not_found) {
1118                 ret = -ENODATA;
1119                 goto cleanup;
1120         }
1121
1122         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1123         size = le64_to_cpu(xs->here->xe_value_size);
1124         if (buffer) {
1125                 ret = -ERANGE;
1126                 if (size > buffer_size)
1127                         goto cleanup;
1128
1129                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1130                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1131                 i = xs->here - xs->header->xh_entries;
1132
1133                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1134                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1135                                                                 bucket_xh(xs->bucket),
1136                                                                 i,
1137                                                                 &block_off,
1138                                                                 &name_offset);
1139                         xs->base = bucket_block(xs->bucket, block_off);
1140                 }
1141                 if (ocfs2_xattr_is_local(xs->here)) {
1142                         memcpy(buffer, (void *)xs->base +
1143                                name_offset + name_len, size);
1144                 } else {
1145                         xv = (struct ocfs2_xattr_value_root *)
1146                                 (xs->base + name_offset + name_len);
1147                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1148                                                             buffer, size);
1149                         if (ret < 0) {
1150                                 mlog_errno(ret);
1151                                 goto cleanup;
1152                         }
1153                 }
1154         }
1155         ret = size;
1156 cleanup:
1157         ocfs2_xattr_bucket_free(xs->bucket);
1158
1159         brelse(xs->xattr_bh);
1160         xs->xattr_bh = NULL;
1161         return ret;
1162 }
1163
1164 int ocfs2_xattr_get_nolock(struct inode *inode,
1165                            struct buffer_head *di_bh,
1166                            int name_index,
1167                            const char *name,
1168                            void *buffer,
1169                            size_t buffer_size)
1170 {
1171         int ret;
1172         struct ocfs2_dinode *di = NULL;
1173         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1174         struct ocfs2_xattr_search xis = {
1175                 .not_found = -ENODATA,
1176         };
1177         struct ocfs2_xattr_search xbs = {
1178                 .not_found = -ENODATA,
1179         };
1180
1181         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1182                 return -EOPNOTSUPP;
1183
1184         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1185                 ret = -ENODATA;
1186
1187         xis.inode_bh = xbs.inode_bh = di_bh;
1188         di = (struct ocfs2_dinode *)di_bh->b_data;
1189
1190         down_read(&oi->ip_xattr_sem);
1191         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1192                                     buffer_size, &xis);
1193         if (ret == -ENODATA && di->i_xattr_loc)
1194                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1195                                             buffer_size, &xbs);
1196         up_read(&oi->ip_xattr_sem);
1197
1198         return ret;
1199 }
1200
1201 /* ocfs2_xattr_get()
1202  *
1203  * Copy an extended attribute into the buffer provided.
1204  * Buffer is NULL to compute the size of buffer required.
1205  */
1206 static int ocfs2_xattr_get(struct inode *inode,
1207                            int name_index,
1208                            const char *name,
1209                            void *buffer,
1210                            size_t buffer_size)
1211 {
1212         int ret;
1213         struct buffer_head *di_bh = NULL;
1214
1215         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1216         if (ret < 0) {
1217                 mlog_errno(ret);
1218                 return ret;
1219         }
1220         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1221                                      name, buffer, buffer_size);
1222
1223         ocfs2_inode_unlock(inode, 0);
1224
1225         brelse(di_bh);
1226
1227         return ret;
1228 }
1229
1230 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1231                                            handle_t *handle,
1232                                            struct ocfs2_xattr_value_buf *vb,
1233                                            const void *value,
1234                                            int value_len)
1235 {
1236         int ret = 0, i, cp_len;
1237         u16 blocksize = inode->i_sb->s_blocksize;
1238         u32 p_cluster, num_clusters;
1239         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1240         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1241         u64 blkno;
1242         struct buffer_head *bh = NULL;
1243         unsigned int ext_flags;
1244         struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1245
1246         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1247
1248         while (cpos < clusters) {
1249                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1250                                                &num_clusters, &xv->xr_list,
1251                                                &ext_flags);
1252                 if (ret) {
1253                         mlog_errno(ret);
1254                         goto out;
1255                 }
1256
1257                 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1258
1259                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1260
1261                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1262                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1263                                                &bh, NULL);
1264                         if (ret) {
1265                                 mlog_errno(ret);
1266                                 goto out;
1267                         }
1268
1269                         ret = ocfs2_journal_access(handle,
1270                                                    INODE_CACHE(inode),
1271                                                    bh,
1272                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1273                         if (ret < 0) {
1274                                 mlog_errno(ret);
1275                                 goto out;
1276                         }
1277
1278                         cp_len = value_len > blocksize ? blocksize : value_len;
1279                         memcpy(bh->b_data, value, cp_len);
1280                         value_len -= cp_len;
1281                         value += cp_len;
1282                         if (cp_len < blocksize)
1283                                 memset(bh->b_data + cp_len, 0,
1284                                        blocksize - cp_len);
1285
1286                         ret = ocfs2_journal_dirty(handle, bh);
1287                         if (ret < 0) {
1288                                 mlog_errno(ret);
1289                                 goto out;
1290                         }
1291                         brelse(bh);
1292                         bh = NULL;
1293
1294                         /*
1295                          * XXX: do we need to empty all the following
1296                          * blocks in this cluster?
1297                          */
1298                         if (!value_len)
1299                                 break;
1300                 }
1301                 cpos += num_clusters;
1302         }
1303 out:
1304         brelse(bh);
1305
1306         return ret;
1307 }
1308
1309 static int ocfs2_xattr_cleanup(struct inode *inode,
1310                                handle_t *handle,
1311                                struct ocfs2_xattr_info *xi,
1312                                struct ocfs2_xattr_search *xs,
1313                                struct ocfs2_xattr_value_buf *vb,
1314                                size_t offs)
1315 {
1316         int ret = 0;
1317         size_t name_len = strlen(xi->name);
1318         void *val = xs->base + offs;
1319         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1320
1321         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1322                             OCFS2_JOURNAL_ACCESS_WRITE);
1323         if (ret) {
1324                 mlog_errno(ret);
1325                 goto out;
1326         }
1327         /* Decrease xattr count */
1328         le16_add_cpu(&xs->header->xh_count, -1);
1329         /* Remove the xattr entry and tree root which has already be set*/
1330         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1331         memset(val, 0, size);
1332
1333         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1334         if (ret < 0)
1335                 mlog_errno(ret);
1336 out:
1337         return ret;
1338 }
1339
1340 static int ocfs2_xattr_update_entry(struct inode *inode,
1341                                     handle_t *handle,
1342                                     struct ocfs2_xattr_info *xi,
1343                                     struct ocfs2_xattr_search *xs,
1344                                     struct ocfs2_xattr_value_buf *vb,
1345                                     size_t offs)
1346 {
1347         int ret;
1348
1349         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1350                             OCFS2_JOURNAL_ACCESS_WRITE);
1351         if (ret) {
1352                 mlog_errno(ret);
1353                 goto out;
1354         }
1355
1356         xs->here->xe_name_offset = cpu_to_le16(offs);
1357         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1358         if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1359                 ocfs2_xattr_set_local(xs->here, 1);
1360         else
1361                 ocfs2_xattr_set_local(xs->here, 0);
1362         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1363
1364         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1365         if (ret < 0)
1366                 mlog_errno(ret);
1367 out:
1368         return ret;
1369 }
1370
1371 /*
1372  * ocfs2_xattr_set_value_outside()
1373  *
1374  * Set large size value in B tree.
1375  */
1376 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1377                                          struct ocfs2_xattr_info *xi,
1378                                          struct ocfs2_xattr_search *xs,
1379                                          struct ocfs2_xattr_set_ctxt *ctxt,
1380                                          struct ocfs2_xattr_value_buf *vb,
1381                                          size_t offs)
1382 {
1383         size_t name_len = strlen(xi->name);
1384         void *val = xs->base + offs;
1385         struct ocfs2_xattr_value_root *xv = NULL;
1386         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1387         int ret = 0;
1388
1389         memset(val, 0, size);
1390         memcpy(val, xi->name, name_len);
1391         xv = (struct ocfs2_xattr_value_root *)
1392                 (val + OCFS2_XATTR_SIZE(name_len));
1393         xv->xr_clusters = 0;
1394         xv->xr_last_eb_blk = 0;
1395         xv->xr_list.l_tree_depth = 0;
1396         xv->xr_list.l_count = cpu_to_le16(1);
1397         xv->xr_list.l_next_free_rec = 0;
1398         vb->vb_xv = xv;
1399
1400         ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt);
1401         if (ret < 0) {
1402                 mlog_errno(ret);
1403                 return ret;
1404         }
1405         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1406         if (ret < 0) {
1407                 mlog_errno(ret);
1408                 return ret;
1409         }
1410         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
1411                                               xi->value, xi->value_len);
1412         if (ret < 0)
1413                 mlog_errno(ret);
1414
1415         return ret;
1416 }
1417
1418 /*
1419  * ocfs2_xattr_set_entry_local()
1420  *
1421  * Set, replace or remove extended attribute in local.
1422  */
1423 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1424                                         struct ocfs2_xattr_info *xi,
1425                                         struct ocfs2_xattr_search *xs,
1426                                         struct ocfs2_xattr_entry *last,
1427                                         size_t min_offs)
1428 {
1429         size_t name_len = strlen(xi->name);
1430         int i;
1431
1432         if (xi->value && xs->not_found) {
1433                 /* Insert the new xattr entry. */
1434                 le16_add_cpu(&xs->header->xh_count, 1);
1435                 ocfs2_xattr_set_type(last, xi->name_index);
1436                 ocfs2_xattr_set_local(last, 1);
1437                 last->xe_name_len = name_len;
1438         } else {
1439                 void *first_val;
1440                 void *val;
1441                 size_t offs, size;
1442
1443                 first_val = xs->base + min_offs;
1444                 offs = le16_to_cpu(xs->here->xe_name_offset);
1445                 val = xs->base + offs;
1446
1447                 if (le64_to_cpu(xs->here->xe_value_size) >
1448                     OCFS2_XATTR_INLINE_SIZE)
1449                         size = OCFS2_XATTR_SIZE(name_len) +
1450                                 OCFS2_XATTR_ROOT_SIZE;
1451                 else
1452                         size = OCFS2_XATTR_SIZE(name_len) +
1453                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1454
1455                 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1456                                 OCFS2_XATTR_SIZE(xi->value_len)) {
1457                         /* The old and the new value have the
1458                            same size. Just replace the value. */
1459                         ocfs2_xattr_set_local(xs->here, 1);
1460                         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1461                         /* Clear value bytes. */
1462                         memset(val + OCFS2_XATTR_SIZE(name_len),
1463                                0,
1464                                OCFS2_XATTR_SIZE(xi->value_len));
1465                         memcpy(val + OCFS2_XATTR_SIZE(name_len),
1466                                xi->value,
1467                                xi->value_len);
1468                         return;
1469                 }
1470                 /* Remove the old name+value. */
1471                 memmove(first_val + size, first_val, val - first_val);
1472                 memset(first_val, 0, size);
1473                 xs->here->xe_name_hash = 0;
1474                 xs->here->xe_name_offset = 0;
1475                 ocfs2_xattr_set_local(xs->here, 1);
1476                 xs->here->xe_value_size = 0;
1477
1478                 min_offs += size;
1479
1480                 /* Adjust all value offsets. */
1481                 last = xs->header->xh_entries;
1482                 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1483                         size_t o = le16_to_cpu(last->xe_name_offset);
1484
1485                         if (o < offs)
1486                                 last->xe_name_offset = cpu_to_le16(o + size);
1487                         last += 1;
1488                 }
1489
1490                 if (!xi->value) {
1491                         /* Remove the old entry. */
1492                         last -= 1;
1493                         memmove(xs->here, xs->here + 1,
1494                                 (void *)last - (void *)xs->here);
1495                         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1496                         le16_add_cpu(&xs->header->xh_count, -1);
1497                 }
1498         }
1499         if (xi->value) {
1500                 /* Insert the new name+value. */
1501                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1502                                 OCFS2_XATTR_SIZE(xi->value_len);
1503                 void *val = xs->base + min_offs - size;
1504
1505                 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1506                 memset(val, 0, size);
1507                 memcpy(val, xi->name, name_len);
1508                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1509                        xi->value,
1510                        xi->value_len);
1511                 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1512                 ocfs2_xattr_set_local(xs->here, 1);
1513                 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1514         }
1515
1516         return;
1517 }
1518
1519 /*
1520  * ocfs2_xattr_set_entry()
1521  *
1522  * Set extended attribute entry into inode or block.
1523  *
1524  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1525  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1526  * then set value in B tree with set_value_outside().
1527  */
1528 static int ocfs2_xattr_set_entry(struct inode *inode,
1529                                  struct ocfs2_xattr_info *xi,
1530                                  struct ocfs2_xattr_search *xs,
1531                                  struct ocfs2_xattr_set_ctxt *ctxt,
1532                                  int flag)
1533 {
1534         struct ocfs2_xattr_entry *last;
1535         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1536         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1537         size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1538         size_t size_l = 0;
1539         handle_t *handle = ctxt->handle;
1540         int free, i, ret;
1541         struct ocfs2_xattr_info xi_l = {
1542                 .name_index = xi->name_index,
1543                 .name = xi->name,
1544                 .value = xi->value,
1545                 .value_len = xi->value_len,
1546         };
1547         struct ocfs2_xattr_value_buf vb = {
1548                 .vb_bh = xs->xattr_bh,
1549                 .vb_access = ocfs2_journal_access_di,
1550         };
1551
1552         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1553                 BUG_ON(xs->xattr_bh == xs->inode_bh);
1554                 vb.vb_access = ocfs2_journal_access_xb;
1555         } else
1556                 BUG_ON(xs->xattr_bh != xs->inode_bh);
1557
1558         /* Compute min_offs, last and free space. */
1559         last = xs->header->xh_entries;
1560
1561         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1562                 size_t offs = le16_to_cpu(last->xe_name_offset);
1563                 if (offs < min_offs)
1564                         min_offs = offs;
1565                 last += 1;
1566         }
1567
1568         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
1569         if (free < 0)
1570                 return -EIO;
1571
1572         if (!xs->not_found) {
1573                 size_t size = 0;
1574                 if (ocfs2_xattr_is_local(xs->here))
1575                         size = OCFS2_XATTR_SIZE(name_len) +
1576                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1577                 else
1578                         size = OCFS2_XATTR_SIZE(name_len) +
1579                                 OCFS2_XATTR_ROOT_SIZE;
1580                 free += (size + sizeof(struct ocfs2_xattr_entry));
1581         }
1582         /* Check free space in inode or block */
1583         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1584                 if (free < sizeof(struct ocfs2_xattr_entry) +
1585                            OCFS2_XATTR_SIZE(name_len) +
1586                            OCFS2_XATTR_ROOT_SIZE) {
1587                         ret = -ENOSPC;
1588                         goto out;
1589                 }
1590                 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1591                 xi_l.value = (void *)&def_xv;
1592                 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1593         } else if (xi->value) {
1594                 if (free < sizeof(struct ocfs2_xattr_entry) +
1595                            OCFS2_XATTR_SIZE(name_len) +
1596                            OCFS2_XATTR_SIZE(xi->value_len)) {
1597                         ret = -ENOSPC;
1598                         goto out;
1599                 }
1600         }
1601
1602         if (!xs->not_found) {
1603                 /* For existing extended attribute */
1604                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1605                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1606                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1607                 void *val = xs->base + offs;
1608
1609                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1610                         /* Replace existing local xattr with tree root */
1611                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1612                                                             ctxt, &vb, offs);
1613                         if (ret < 0)
1614                                 mlog_errno(ret);
1615                         goto out;
1616                 } else if (!ocfs2_xattr_is_local(xs->here)) {
1617                         /* For existing xattr which has value outside */
1618                         vb.vb_xv = (struct ocfs2_xattr_value_root *)
1619                                 (val + OCFS2_XATTR_SIZE(name_len));
1620
1621                         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1622                                 /*
1623                                  * If new value need set outside also,
1624                                  * first truncate old value to new value,
1625                                  * then set new value with set_value_outside().
1626                                  */
1627                                 ret = ocfs2_xattr_value_truncate(inode,
1628                                                                  &vb,
1629                                                                  xi->value_len,
1630                                                                  ctxt);
1631                                 if (ret < 0) {
1632                                         mlog_errno(ret);
1633                                         goto out;
1634                                 }
1635
1636                                 ret = ocfs2_xattr_update_entry(inode,
1637                                                                handle,
1638                                                                xi,
1639                                                                xs,
1640                                                                &vb,
1641                                                                offs);
1642                                 if (ret < 0) {
1643                                         mlog_errno(ret);
1644                                         goto out;
1645                                 }
1646
1647                                 ret = __ocfs2_xattr_set_value_outside(inode,
1648                                                                 handle,
1649                                                                 &vb,
1650                                                                 xi->value,
1651                                                                 xi->value_len);
1652                                 if (ret < 0)
1653                                         mlog_errno(ret);
1654                                 goto out;
1655                         } else {
1656                                 /*
1657                                  * If new value need set in local,
1658                                  * just trucate old value to zero.
1659                                  */
1660                                  ret = ocfs2_xattr_value_truncate(inode,
1661                                                                   &vb,
1662                                                                   0,
1663                                                                   ctxt);
1664                                 if (ret < 0)
1665                                         mlog_errno(ret);
1666                         }
1667                 }
1668         }
1669
1670         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
1671                                       OCFS2_JOURNAL_ACCESS_WRITE);
1672         if (ret) {
1673                 mlog_errno(ret);
1674                 goto out;
1675         }
1676
1677         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1678                 ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
1679                                    OCFS2_JOURNAL_ACCESS_WRITE);
1680                 if (ret) {
1681                         mlog_errno(ret);
1682                         goto out;
1683                 }
1684         }
1685
1686         /*
1687          * Set value in local, include set tree root in local.
1688          * This is the first step for value size >INLINE_SIZE.
1689          */
1690         ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1691
1692         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1693                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1694                 if (ret < 0) {
1695                         mlog_errno(ret);
1696                         goto out;
1697                 }
1698         }
1699
1700         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1701             (flag & OCFS2_INLINE_XATTR_FL)) {
1702                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1703                 unsigned int xattrsize = osb->s_xattr_inline_size;
1704
1705                 /*
1706                  * Adjust extent record count or inline data size
1707                  * to reserve space for extended attribute.
1708                  */
1709                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1710                         struct ocfs2_inline_data *idata = &di->id2.i_data;
1711                         le16_add_cpu(&idata->id_count, -xattrsize);
1712                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1713                         struct ocfs2_extent_list *el = &di->id2.i_list;
1714                         le16_add_cpu(&el->l_count, -(xattrsize /
1715                                         sizeof(struct ocfs2_extent_rec)));
1716                 }
1717                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1718         }
1719         /* Update xattr flag */
1720         spin_lock(&oi->ip_lock);
1721         oi->ip_dyn_features |= flag;
1722         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1723         spin_unlock(&oi->ip_lock);
1724
1725         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1726         if (ret < 0)
1727                 mlog_errno(ret);
1728
1729         if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1730                 /*
1731                  * Set value outside in B tree.
1732                  * This is the second step for value size > INLINE_SIZE.
1733                  */
1734                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1735                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
1736                                                     &vb, offs);
1737                 if (ret < 0) {
1738                         int ret2;
1739
1740                         mlog_errno(ret);
1741                         /*
1742                          * If set value outside failed, we have to clean
1743                          * the junk tree root we have already set in local.
1744                          */
1745                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1746                                                    xi, xs, &vb, offs);
1747                         if (ret2 < 0)
1748                                 mlog_errno(ret2);
1749                 }
1750         }
1751 out:
1752         return ret;
1753 }
1754
1755 static int ocfs2_remove_value_outside(struct inode*inode,
1756                                       struct ocfs2_xattr_value_buf *vb,
1757                                       struct ocfs2_xattr_header *header)
1758 {
1759         int ret = 0, i;
1760         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1761         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1762
1763         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1764
1765         ctxt.handle = ocfs2_start_trans(osb,
1766                                         ocfs2_remove_extent_credits(osb->sb));
1767         if (IS_ERR(ctxt.handle)) {
1768                 ret = PTR_ERR(ctxt.handle);
1769                 mlog_errno(ret);
1770                 goto out;
1771         }
1772
1773         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1774                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1775
1776                 if (!ocfs2_xattr_is_local(entry)) {
1777                         void *val;
1778
1779                         val = (void *)header +
1780                                 le16_to_cpu(entry->xe_name_offset);
1781                         vb->vb_xv = (struct ocfs2_xattr_value_root *)
1782                                 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1783                         ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1784                         if (ret < 0) {
1785                                 mlog_errno(ret);
1786                                 break;
1787                         }
1788                 }
1789         }
1790
1791         ocfs2_commit_trans(osb, ctxt.handle);
1792         ocfs2_schedule_truncate_log_flush(osb, 1);
1793         ocfs2_run_deallocs(osb, &ctxt.dealloc);
1794 out:
1795         return ret;
1796 }
1797
1798 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1799                                     struct buffer_head *di_bh)
1800 {
1801
1802         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1803         struct ocfs2_xattr_header *header;
1804         int ret;
1805         struct ocfs2_xattr_value_buf vb = {
1806                 .vb_bh = di_bh,
1807                 .vb_access = ocfs2_journal_access_di,
1808         };
1809
1810         header = (struct ocfs2_xattr_header *)
1811                  ((void *)di + inode->i_sb->s_blocksize -
1812                  le16_to_cpu(di->i_xattr_inline_size));
1813
1814         ret = ocfs2_remove_value_outside(inode, &vb, header);
1815
1816         return ret;
1817 }
1818
1819 static int ocfs2_xattr_block_remove(struct inode *inode,
1820                                     struct buffer_head *blk_bh)
1821 {
1822         struct ocfs2_xattr_block *xb;
1823         int ret = 0;
1824         struct ocfs2_xattr_value_buf vb = {
1825                 .vb_bh = blk_bh,
1826                 .vb_access = ocfs2_journal_access_xb,
1827         };
1828
1829         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1830         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1831                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1832                 ret = ocfs2_remove_value_outside(inode, &vb, header);
1833         } else
1834                 ret = ocfs2_iterate_xattr_index_block(inode,
1835                                                 blk_bh,
1836                                                 ocfs2_rm_xattr_cluster,
1837                                                 NULL);
1838
1839         return ret;
1840 }
1841
1842 static int ocfs2_xattr_free_block(struct inode *inode,
1843                                   u64 block)
1844 {
1845         struct inode *xb_alloc_inode;
1846         struct buffer_head *xb_alloc_bh = NULL;
1847         struct buffer_head *blk_bh = NULL;
1848         struct ocfs2_xattr_block *xb;
1849         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1850         handle_t *handle;
1851         int ret = 0;
1852         u64 blk, bg_blkno;
1853         u16 bit;
1854
1855         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
1856         if (ret < 0) {
1857                 mlog_errno(ret);
1858                 goto out;
1859         }
1860
1861         ret = ocfs2_xattr_block_remove(inode, blk_bh);
1862         if (ret < 0) {
1863                 mlog_errno(ret);
1864                 goto out;
1865         }
1866
1867         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1868         blk = le64_to_cpu(xb->xb_blkno);
1869         bit = le16_to_cpu(xb->xb_suballoc_bit);
1870         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1871
1872         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1873                                 EXTENT_ALLOC_SYSTEM_INODE,
1874                                 le16_to_cpu(xb->xb_suballoc_slot));
1875         if (!xb_alloc_inode) {
1876                 ret = -ENOMEM;
1877                 mlog_errno(ret);
1878                 goto out;
1879         }
1880         mutex_lock(&xb_alloc_inode->i_mutex);
1881
1882         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1883         if (ret < 0) {
1884                 mlog_errno(ret);
1885                 goto out_mutex;
1886         }
1887
1888         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1889         if (IS_ERR(handle)) {
1890                 ret = PTR_ERR(handle);
1891                 mlog_errno(ret);
1892                 goto out_unlock;
1893         }
1894
1895         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1896                                        bit, bg_blkno, 1);
1897         if (ret < 0)
1898                 mlog_errno(ret);
1899
1900         ocfs2_commit_trans(osb, handle);
1901 out_unlock:
1902         ocfs2_inode_unlock(xb_alloc_inode, 1);
1903         brelse(xb_alloc_bh);
1904 out_mutex:
1905         mutex_unlock(&xb_alloc_inode->i_mutex);
1906         iput(xb_alloc_inode);
1907 out:
1908         brelse(blk_bh);
1909         return ret;
1910 }
1911
1912 /*
1913  * ocfs2_xattr_remove()
1914  *
1915  * Free extended attribute resources associated with this inode.
1916  */
1917 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1918 {
1919         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1920         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1921         handle_t *handle;
1922         int ret;
1923
1924         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1925                 return 0;
1926
1927         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1928                 return 0;
1929
1930         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1931                 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1932                 if (ret < 0) {
1933                         mlog_errno(ret);
1934                         goto out;
1935                 }
1936         }
1937
1938         if (di->i_xattr_loc) {
1939                 ret = ocfs2_xattr_free_block(inode,
1940                                              le64_to_cpu(di->i_xattr_loc));
1941                 if (ret < 0) {
1942                         mlog_errno(ret);
1943                         goto out;
1944                 }
1945         }
1946
1947         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1948                                    OCFS2_INODE_UPDATE_CREDITS);
1949         if (IS_ERR(handle)) {
1950                 ret = PTR_ERR(handle);
1951                 mlog_errno(ret);
1952                 goto out;
1953         }
1954         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1955                                       OCFS2_JOURNAL_ACCESS_WRITE);
1956         if (ret) {
1957                 mlog_errno(ret);
1958                 goto out_commit;
1959         }
1960
1961         di->i_xattr_loc = 0;
1962
1963         spin_lock(&oi->ip_lock);
1964         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1965         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1966         spin_unlock(&oi->ip_lock);
1967
1968         ret = ocfs2_journal_dirty(handle, di_bh);
1969         if (ret < 0)
1970                 mlog_errno(ret);
1971 out_commit:
1972         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1973 out:
1974         return ret;
1975 }
1976
1977 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1978                                         struct ocfs2_dinode *di)
1979 {
1980         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1981         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1982         int free;
1983
1984         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1985                 return 0;
1986
1987         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1988                 struct ocfs2_inline_data *idata = &di->id2.i_data;
1989                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1990         } else if (ocfs2_inode_is_fast_symlink(inode)) {
1991                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1992                         le64_to_cpu(di->i_size);
1993         } else {
1994                 struct ocfs2_extent_list *el = &di->id2.i_list;
1995                 free = (le16_to_cpu(el->l_count) -
1996                         le16_to_cpu(el->l_next_free_rec)) *
1997                         sizeof(struct ocfs2_extent_rec);
1998         }
1999         if (free >= xattrsize)
2000                 return 1;
2001
2002         return 0;
2003 }
2004
2005 /*
2006  * ocfs2_xattr_ibody_find()
2007  *
2008  * Find extended attribute in inode block and
2009  * fill search info into struct ocfs2_xattr_search.
2010  */
2011 static int ocfs2_xattr_ibody_find(struct inode *inode,
2012                                   int name_index,
2013                                   const char *name,
2014                                   struct ocfs2_xattr_search *xs)
2015 {
2016         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2017         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2018         int ret;
2019         int has_space = 0;
2020
2021         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2022                 return 0;
2023
2024         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2025                 down_read(&oi->ip_alloc_sem);
2026                 has_space = ocfs2_xattr_has_space_inline(inode, di);
2027                 up_read(&oi->ip_alloc_sem);
2028                 if (!has_space)
2029                         return 0;
2030         }
2031
2032         xs->xattr_bh = xs->inode_bh;
2033         xs->end = (void *)di + inode->i_sb->s_blocksize;
2034         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2035                 xs->header = (struct ocfs2_xattr_header *)
2036                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2037         else
2038                 xs->header = (struct ocfs2_xattr_header *)
2039                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2040         xs->base = (void *)xs->header;
2041         xs->here = xs->header->xh_entries;
2042
2043         /* Find the named attribute. */
2044         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2045                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2046                 if (ret && ret != -ENODATA)
2047                         return ret;
2048                 xs->not_found = ret;
2049         }
2050
2051         return 0;
2052 }
2053
2054 /*
2055  * ocfs2_xattr_ibody_set()
2056  *
2057  * Set, replace or remove an extended attribute into inode block.
2058  *
2059  */
2060 static int ocfs2_xattr_ibody_set(struct inode *inode,
2061                                  struct ocfs2_xattr_info *xi,
2062                                  struct ocfs2_xattr_search *xs,
2063                                  struct ocfs2_xattr_set_ctxt *ctxt)
2064 {
2065         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2066         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2067         int ret;
2068
2069         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2070                 return -ENOSPC;
2071
2072         down_write(&oi->ip_alloc_sem);
2073         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2074                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2075                         ret = -ENOSPC;
2076                         goto out;
2077                 }
2078         }
2079
2080         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2081                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2082 out:
2083         up_write(&oi->ip_alloc_sem);
2084
2085         return ret;
2086 }
2087
2088 /*
2089  * ocfs2_xattr_block_find()
2090  *
2091  * Find extended attribute in external block and
2092  * fill search info into struct ocfs2_xattr_search.
2093  */
2094 static int ocfs2_xattr_block_find(struct inode *inode,
2095                                   int name_index,
2096                                   const char *name,
2097                                   struct ocfs2_xattr_search *xs)
2098 {
2099         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2100         struct buffer_head *blk_bh = NULL;
2101         struct ocfs2_xattr_block *xb;
2102         int ret = 0;
2103
2104         if (!di->i_xattr_loc)
2105                 return ret;
2106
2107         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2108                                      &blk_bh);
2109         if (ret < 0) {
2110                 mlog_errno(ret);
2111                 return ret;
2112         }
2113
2114         xs->xattr_bh = blk_bh;
2115         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2116
2117         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2118                 xs->header = &xb->xb_attrs.xb_header;
2119                 xs->base = (void *)xs->header;
2120                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2121                 xs->here = xs->header->xh_entries;
2122
2123                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2124         } else
2125                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2126                                                    name_index,
2127                                                    name, xs);
2128
2129         if (ret && ret != -ENODATA) {
2130                 xs->xattr_bh = NULL;
2131                 goto cleanup;
2132         }
2133         xs->not_found = ret;
2134         return 0;
2135 cleanup:
2136         brelse(blk_bh);
2137
2138         return ret;
2139 }
2140
2141 static int ocfs2_create_xattr_block(handle_t *handle,
2142                                     struct inode *inode,
2143                                     struct buffer_head *inode_bh,
2144                                     struct ocfs2_alloc_context *meta_ac,
2145                                     struct buffer_head **ret_bh)
2146 {
2147         int ret;
2148         u16 suballoc_bit_start;
2149         u32 num_got;
2150         u64 first_blkno;
2151         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2152         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2153         struct buffer_head *new_bh = NULL;
2154         struct ocfs2_xattr_block *xblk;
2155
2156         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2157                                       OCFS2_JOURNAL_ACCESS_CREATE);
2158         if (ret < 0) {
2159                 mlog_errno(ret);
2160                 goto end;
2161         }
2162
2163         ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2164                                    &suballoc_bit_start, &num_got,
2165                                    &first_blkno);
2166         if (ret < 0) {
2167                 mlog_errno(ret);
2168                 goto end;
2169         }
2170
2171         new_bh = sb_getblk(inode->i_sb, first_blkno);
2172         ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2173
2174         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2175                                       new_bh,
2176                                       OCFS2_JOURNAL_ACCESS_CREATE);
2177         if (ret < 0) {
2178                 mlog_errno(ret);
2179                 goto end;
2180         }
2181
2182         /* Initialize ocfs2_xattr_block */
2183         xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2184         memset(xblk, 0, inode->i_sb->s_blocksize);
2185         strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2186         xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2187         xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2188         xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2189         xblk->xb_blkno = cpu_to_le64(first_blkno);
2190
2191         ret = ocfs2_journal_dirty(handle, new_bh);
2192         if (ret < 0) {
2193                 mlog_errno(ret);
2194                 goto end;
2195         }
2196         di->i_xattr_loc = cpu_to_le64(first_blkno);
2197         ocfs2_journal_dirty(handle, inode_bh);
2198
2199         *ret_bh = new_bh;
2200         new_bh = NULL;
2201
2202 end:
2203         brelse(new_bh);
2204         return ret;
2205 }
2206
2207 /*
2208  * ocfs2_xattr_block_set()
2209  *
2210  * Set, replace or remove an extended attribute into external block.
2211  *
2212  */
2213 static int ocfs2_xattr_block_set(struct inode *inode,
2214                                  struct ocfs2_xattr_info *xi,
2215                                  struct ocfs2_xattr_search *xs,
2216                                  struct ocfs2_xattr_set_ctxt *ctxt)
2217 {
2218         struct buffer_head *new_bh = NULL;
2219         handle_t *handle = ctxt->handle;
2220         struct ocfs2_xattr_block *xblk = NULL;
2221         int ret;
2222
2223         if (!xs->xattr_bh) {
2224                 ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2225                                                ctxt->meta_ac, &new_bh);
2226                 if (ret) {
2227                         mlog_errno(ret);
2228                         goto end;
2229                 }
2230
2231                 xs->xattr_bh = new_bh;
2232                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2233                 xs->header = &xblk->xb_attrs.xb_header;
2234                 xs->base = (void *)xs->header;
2235                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2236                 xs->here = xs->header->xh_entries;
2237         } else
2238                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2239
2240         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2241                 /* Set extended attribute into external block */
2242                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2243                                             OCFS2_HAS_XATTR_FL);
2244                 if (!ret || ret != -ENOSPC)
2245                         goto end;
2246
2247                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2248                 if (ret)
2249                         goto end;
2250         }
2251
2252         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2253
2254 end:
2255
2256         return ret;
2257 }
2258
2259 /* Check whether the new xattr can be inserted into the inode. */
2260 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2261                                        struct ocfs2_xattr_info *xi,
2262                                        struct ocfs2_xattr_search *xs)
2263 {
2264         u64 value_size;
2265         struct ocfs2_xattr_entry *last;
2266         int free, i;
2267         size_t min_offs = xs->end - xs->base;
2268
2269         if (!xs->header)
2270                 return 0;
2271
2272         last = xs->header->xh_entries;
2273
2274         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2275                 size_t offs = le16_to_cpu(last->xe_name_offset);
2276                 if (offs < min_offs)
2277                         min_offs = offs;
2278                 last += 1;
2279         }
2280
2281         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2282         if (free < 0)
2283                 return 0;
2284
2285         BUG_ON(!xs->not_found);
2286
2287         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2288                 value_size = OCFS2_XATTR_ROOT_SIZE;
2289         else
2290                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2291
2292         if (free >= sizeof(struct ocfs2_xattr_entry) +
2293                    OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2294                 return 1;
2295
2296         return 0;
2297 }
2298
2299 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2300                                      struct ocfs2_dinode *di,
2301                                      struct ocfs2_xattr_info *xi,
2302                                      struct ocfs2_xattr_search *xis,
2303                                      struct ocfs2_xattr_search *xbs,
2304                                      int *clusters_need,
2305                                      int *meta_need,
2306                                      int *credits_need)
2307 {
2308         int ret = 0, old_in_xb = 0;
2309         int clusters_add = 0, meta_add = 0, credits = 0;
2310         struct buffer_head *bh = NULL;
2311         struct ocfs2_xattr_block *xb = NULL;
2312         struct ocfs2_xattr_entry *xe = NULL;
2313         struct ocfs2_xattr_value_root *xv = NULL;
2314         char *base = NULL;
2315         int name_offset, name_len = 0;
2316         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2317                                                     xi->value_len);
2318         u64 value_size;
2319
2320         /*
2321          * Calculate the clusters we need to write.
2322          * No matter whether we replace an old one or add a new one,
2323          * we need this for writing.
2324          */
2325         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2326                 credits += new_clusters *
2327                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
2328
2329         if (xis->not_found && xbs->not_found) {
2330                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2331
2332                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2333                         clusters_add += new_clusters;
2334                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2335                                                         &def_xv.xv.xr_list,
2336                                                         new_clusters);
2337                 }
2338
2339                 goto meta_guess;
2340         }
2341
2342         if (!xis->not_found) {
2343                 xe = xis->here;
2344                 name_offset = le16_to_cpu(xe->xe_name_offset);
2345                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2346                 base = xis->base;
2347                 credits += OCFS2_INODE_UPDATE_CREDITS;
2348         } else {
2349                 int i, block_off = 0;
2350                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2351                 xe = xbs->here;
2352                 name_offset = le16_to_cpu(xe->xe_name_offset);
2353                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2354                 i = xbs->here - xbs->header->xh_entries;
2355                 old_in_xb = 1;
2356
2357                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2358                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2359                                                         bucket_xh(xbs->bucket),
2360                                                         i, &block_off,
2361                                                         &name_offset);
2362                         base = bucket_block(xbs->bucket, block_off);
2363                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2364                 } else {
2365                         base = xbs->base;
2366                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2367                 }
2368         }
2369
2370         /*
2371          * delete a xattr doesn't need metadata and cluster allocation.
2372          * so just calculate the credits and return.
2373          *
2374          * The credits for removing the value tree will be extended
2375          * by ocfs2_remove_extent itself.
2376          */
2377         if (!xi->value) {
2378                 if (!ocfs2_xattr_is_local(xe))
2379                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2380
2381                 goto out;
2382         }
2383
2384         /* do cluster allocation guess first. */
2385         value_size = le64_to_cpu(xe->xe_value_size);
2386
2387         if (old_in_xb) {
2388                 /*
2389                  * In xattr set, we always try to set the xe in inode first,
2390                  * so if it can be inserted into inode successfully, the old
2391                  * one will be removed from the xattr block, and this xattr
2392                  * will be inserted into inode as a new xattr in inode.
2393                  */
2394                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2395                         clusters_add += new_clusters;
2396                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
2397                                     OCFS2_INODE_UPDATE_CREDITS;
2398                         if (!ocfs2_xattr_is_local(xe))
2399                                 credits += ocfs2_calc_extend_credits(
2400                                                         inode->i_sb,
2401                                                         &def_xv.xv.xr_list,
2402                                                         new_clusters);
2403                         goto out;
2404                 }
2405         }
2406
2407         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2408                 /* the new values will be stored outside. */
2409                 u32 old_clusters = 0;
2410
2411                 if (!ocfs2_xattr_is_local(xe)) {
2412                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
2413                                                                  value_size);
2414                         xv = (struct ocfs2_xattr_value_root *)
2415                              (base + name_offset + name_len);
2416                         value_size = OCFS2_XATTR_ROOT_SIZE;
2417                 } else
2418                         xv = &def_xv.xv;
2419
2420                 if (old_clusters >= new_clusters) {
2421                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2422                         goto out;
2423                 } else {
2424                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2425                         clusters_add += new_clusters - old_clusters;
2426                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2427                                                              &xv->xr_list,
2428                                                              new_clusters -
2429                                                              old_clusters);
2430                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2431                                 goto out;
2432                 }
2433         } else {
2434                 /*
2435                  * Now the new value will be stored inside. So if the new
2436                  * value is smaller than the size of value root or the old
2437                  * value, we don't need any allocation, otherwise we have
2438                  * to guess metadata allocation.
2439                  */
2440                 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2441                     (!ocfs2_xattr_is_local(xe) &&
2442                      OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2443                         goto out;
2444         }
2445
2446 meta_guess:
2447         /* calculate metadata allocation. */
2448         if (di->i_xattr_loc) {
2449                 if (!xbs->xattr_bh) {
2450                         ret = ocfs2_read_xattr_block(inode,
2451                                                      le64_to_cpu(di->i_xattr_loc),
2452                                                      &bh);
2453                         if (ret) {
2454                                 mlog_errno(ret);
2455                                 goto out;
2456                         }
2457
2458                         xb = (struct ocfs2_xattr_block *)bh->b_data;
2459                 } else
2460                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2461
2462                 /*
2463                  * If there is already an xattr tree, good, we can calculate
2464                  * like other b-trees. Otherwise we may have the chance of
2465                  * create a tree, the credit calculation is borrowed from
2466                  * ocfs2_calc_extend_credits with root_el = NULL. And the
2467                  * new tree will be cluster based, so no meta is needed.
2468                  */
2469                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2470                         struct ocfs2_extent_list *el =
2471                                  &xb->xb_attrs.xb_root.xt_list;
2472                         meta_add += ocfs2_extend_meta_needed(el);
2473                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2474                                                              el, 1);
2475                 } else
2476                         credits += OCFS2_SUBALLOC_ALLOC + 1;
2477
2478                 /*
2479                  * This cluster will be used either for new bucket or for
2480                  * new xattr block.
2481                  * If the cluster size is the same as the bucket size, one
2482                  * more is needed since we may need to extend the bucket
2483                  * also.
2484                  */
2485                 clusters_add += 1;
2486                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2487                 if (OCFS2_XATTR_BUCKET_SIZE ==
2488                         OCFS2_SB(inode->i_sb)->s_clustersize) {
2489                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2490                         clusters_add += 1;
2491                 }
2492         } else {
2493                 meta_add += 1;
2494                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2495         }
2496 out:
2497         if (clusters_need)
2498                 *clusters_need = clusters_add;
2499         if (meta_need)
2500                 *meta_need = meta_add;
2501         if (credits_need)
2502                 *credits_need = credits;
2503         brelse(bh);
2504         return ret;
2505 }
2506
2507 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2508                                      struct ocfs2_dinode *di,
2509                                      struct ocfs2_xattr_info *xi,
2510                                      struct ocfs2_xattr_search *xis,
2511                                      struct ocfs2_xattr_search *xbs,
2512                                      struct ocfs2_xattr_set_ctxt *ctxt,
2513                                      int extra_meta,
2514                                      int *credits)
2515 {
2516         int clusters_add, meta_add, ret;
2517         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2518
2519         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2520
2521         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2522
2523         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2524                                         &clusters_add, &meta_add, credits);
2525         if (ret) {
2526                 mlog_errno(ret);
2527                 return ret;
2528         }
2529
2530         meta_add += extra_meta;
2531         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2532              "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2533
2534         if (meta_add) {
2535                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2536                                                         &ctxt->meta_ac);
2537                 if (ret) {
2538                         mlog_errno(ret);
2539                         goto out;
2540                 }
2541         }
2542
2543         if (clusters_add) {
2544                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2545                 if (ret)
2546                         mlog_errno(ret);
2547         }
2548 out:
2549         if (ret) {
2550                 if (ctxt->meta_ac) {
2551                         ocfs2_free_alloc_context(ctxt->meta_ac);
2552                         ctxt->meta_ac = NULL;
2553                 }
2554
2555                 /*
2556                  * We cannot have an error and a non null ctxt->data_ac.
2557                  */
2558         }
2559
2560         return ret;
2561 }
2562
2563 static int __ocfs2_xattr_set_handle(struct inode *inode,
2564                                     struct ocfs2_dinode *di,
2565                                     struct ocfs2_xattr_info *xi,
2566                                     struct ocfs2_xattr_search *xis,
2567                                     struct ocfs2_xattr_search *xbs,
2568                                     struct ocfs2_xattr_set_ctxt *ctxt)
2569 {
2570         int ret = 0, credits, old_found;
2571
2572         if (!xi->value) {
2573                 /* Remove existing extended attribute */
2574                 if (!xis->not_found)
2575                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2576                 else if (!xbs->not_found)
2577                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2578         } else {
2579                 /* We always try to set extended attribute into inode first*/
2580                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2581                 if (!ret && !xbs->not_found) {
2582                         /*
2583                          * If succeed and that extended attribute existing in
2584                          * external block, then we will remove it.
2585                          */
2586                         xi->value = NULL;
2587                         xi->value_len = 0;
2588
2589                         old_found = xis->not_found;
2590                         xis->not_found = -ENODATA;
2591                         ret = ocfs2_calc_xattr_set_need(inode,
2592                                                         di,
2593                                                         xi,
2594                                                         xis,
2595                                                         xbs,
2596                                                         NULL,
2597                                                         NULL,
2598                                                         &credits);
2599                         xis->not_found = old_found;
2600                         if (ret) {
2601                                 mlog_errno(ret);
2602                                 goto out;
2603                         }
2604
2605                         ret = ocfs2_extend_trans(ctxt->handle, credits +
2606                                         ctxt->handle->h_buffer_credits);
2607                         if (ret) {
2608                                 mlog_errno(ret);
2609                                 goto out;
2610                         }
2611                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2612                 } else if (ret == -ENOSPC) {
2613                         if (di->i_xattr_loc && !xbs->xattr_bh) {
2614                                 ret = ocfs2_xattr_block_find(inode,
2615                                                              xi->name_index,
2616                                                              xi->name, xbs);
2617                                 if (ret)
2618                                         goto out;
2619
2620                                 old_found = xis->not_found;
2621                                 xis->not_found = -ENODATA;
2622                                 ret = ocfs2_calc_xattr_set_need(inode,
2623                                                                 di,
2624                                                                 xi,
2625                                                                 xis,
2626                                                                 xbs,
2627                                                                 NULL,
2628                                                                 NULL,
2629                                                                 &credits);
2630                                 xis->not_found = old_found;
2631                                 if (ret) {
2632                                         mlog_errno(ret);
2633                                         goto out;
2634                                 }
2635
2636                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2637                                         ctxt->handle->h_buffer_credits);
2638                                 if (ret) {
2639                                         mlog_errno(ret);
2640                                         goto out;
2641                                 }
2642                         }
2643                         /*
2644                          * If no space in inode, we will set extended attribute
2645                          * into external block.
2646                          */
2647                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2648                         if (ret)
2649                                 goto out;
2650                         if (!xis->not_found) {
2651                                 /*
2652                                  * If succeed and that extended attribute
2653                                  * existing in inode, we will remove it.
2654                                  */
2655                                 xi->value = NULL;
2656                                 xi->value_len = 0;
2657                                 xbs->not_found = -ENODATA;
2658                                 ret = ocfs2_calc_xattr_set_need(inode,
2659                                                                 di,
2660                                                                 xi,
2661                                                                 xis,
2662                                                                 xbs,
2663                                                                 NULL,
2664                                                                 NULL,
2665                                                                 &credits);
2666                                 if (ret) {
2667                                         mlog_errno(ret);
2668                                         goto out;
2669                                 }
2670
2671                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2672                                                 ctxt->handle->h_buffer_credits);
2673                                 if (ret) {
2674                                         mlog_errno(ret);
2675                                         goto out;
2676                                 }
2677                                 ret = ocfs2_xattr_ibody_set(inode, xi,
2678                                                             xis, ctxt);
2679                         }
2680                 }
2681         }
2682
2683         if (!ret) {
2684                 /* Update inode ctime. */
2685                 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2686                                               xis->inode_bh,
2687                                               OCFS2_JOURNAL_ACCESS_WRITE);
2688                 if (ret) {
2689                         mlog_errno(ret);
2690                         goto out;
2691                 }
2692
2693                 inode->i_ctime = CURRENT_TIME;
2694                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
2695                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
2696                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
2697         }
2698 out:
2699         return ret;
2700 }
2701
2702 /*
2703  * This function only called duing creating inode
2704  * for init security/acl xattrs of the new inode.
2705  * All transanction credits have been reserved in mknod.
2706  */
2707 int ocfs2_xattr_set_handle(handle_t *handle,
2708                            struct inode *inode,
2709                            struct buffer_head *di_bh,
2710                            int name_index,
2711                            const char *name,
2712                            const void *value,
2713                            size_t value_len,
2714                            int flags,
2715                            struct ocfs2_alloc_context *meta_ac,
2716                            struct ocfs2_alloc_context *data_ac)
2717 {
2718         struct ocfs2_dinode *di;
2719         int ret;
2720
2721         struct ocfs2_xattr_info xi = {
2722                 .name_index = name_index,
2723                 .name = name,
2724                 .value = value,
2725                 .value_len = value_len,
2726         };
2727
2728         struct ocfs2_xattr_search xis = {
2729                 .not_found = -ENODATA,
2730         };
2731
2732         struct ocfs2_xattr_search xbs = {
2733                 .not_found = -ENODATA,
2734         };
2735
2736         struct ocfs2_xattr_set_ctxt ctxt = {
2737                 .handle = handle,
2738                 .meta_ac = meta_ac,
2739                 .data_ac = data_ac,
2740         };
2741
2742         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2743                 return -EOPNOTSUPP;
2744
2745         /*
2746          * In extreme situation, may need xattr bucket when
2747          * block size is too small. And we have already reserved
2748          * the credits for bucket in mknod.
2749          */
2750         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
2751                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
2752                 if (!xbs.bucket) {
2753                         mlog_errno(-ENOMEM);
2754                         return -ENOMEM;
2755                 }
2756         }
2757
2758         xis.inode_bh = xbs.inode_bh = di_bh;
2759         di = (struct ocfs2_dinode *)di_bh->b_data;
2760
2761         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2762
2763         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2764         if (ret)
2765                 goto cleanup;
2766         if (xis.not_found) {
2767                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2768                 if (ret)
2769                         goto cleanup;
2770         }
2771
2772         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2773
2774 cleanup:
2775         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2776         brelse(xbs.xattr_bh);
2777         ocfs2_xattr_bucket_free(xbs.bucket);
2778
2779         return ret;
2780 }
2781
2782 /*
2783  * ocfs2_xattr_set()
2784  *
2785  * Set, replace or remove an extended attribute for this inode.
2786  * value is NULL to remove an existing extended attribute, else either
2787  * create or replace an extended attribute.
2788  */
2789 int ocfs2_xattr_set(struct inode *inode,
2790                     int name_index,
2791                     const char *name,
2792                     const void *value,
2793                     size_t value_len,
2794                     int flags)
2795 {
2796         struct buffer_head *di_bh = NULL;
2797         struct ocfs2_dinode *di;
2798         int ret, credits, ref_meta = 0, ref_credits = 0;
2799         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2800         struct inode *tl_inode = osb->osb_tl_inode;
2801         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2802         struct ocfs2_refcount_tree *ref_tree = NULL;
2803
2804         struct ocfs2_xattr_info xi = {
2805                 .name_index = name_index,
2806                 .name = name,
2807                 .value = value,
2808                 .value_len = value_len,
2809         };
2810
2811         struct ocfs2_xattr_search xis = {
2812                 .not_found = -ENODATA,
2813         };
2814
2815         struct ocfs2_xattr_search xbs = {
2816                 .not_found = -ENODATA,
2817         };
2818
2819         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2820                 return -EOPNOTSUPP;
2821
2822         /*
2823          * Only xbs will be used on indexed trees.  xis doesn't need a
2824          * bucket.
2825          */
2826         xbs.bucket = ocfs2_xattr_bucket_new(inode);
2827         if (!xbs.bucket) {
2828                 mlog_errno(-ENOMEM);
2829                 return -ENOMEM;
2830         }
2831
2832         ret = ocfs2_inode_lock(inode, &di_bh, 1);
2833         if (ret < 0) {
2834                 mlog_errno(ret);
2835                 goto cleanup_nolock;
2836         }
2837         xis.inode_bh = xbs.inode_bh = di_bh;
2838         di = (struct ocfs2_dinode *)di_bh->b_data;
2839
2840         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2841         /*
2842          * Scan inode and external block to find the same name
2843          * extended attribute and collect search infomation.
2844          */
2845         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2846         if (ret)
2847                 goto cleanup;
2848         if (xis.not_found) {
2849                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2850                 if (ret)
2851                         goto cleanup;
2852         }
2853
2854         if (xis.not_found && xbs.not_found) {
2855                 ret = -ENODATA;
2856                 if (flags & XATTR_REPLACE)
2857                         goto cleanup;
2858                 ret = 0;
2859                 if (!value)
2860                         goto cleanup;
2861         } else {
2862                 ret = -EEXIST;
2863                 if (flags & XATTR_CREATE)
2864                         goto cleanup;
2865         }
2866
2867         /* Check whether the value is refcounted and do some prepartion. */
2868         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
2869             (!xis.not_found || !xbs.not_found)) {
2870                 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
2871                                                    &xis, &xbs, &ref_tree,
2872                                                    &ref_meta, &ref_credits);
2873                 if (ret) {
2874                         mlog_errno(ret);
2875                         goto cleanup;
2876                 }
2877         }
2878
2879         mutex_lock(&tl_inode->i_mutex);
2880
2881         if (ocfs2_truncate_log_needs_flush(osb)) {
2882                 ret = __ocfs2_flush_truncate_log(osb);
2883                 if (ret < 0) {
2884                         mutex_unlock(&tl_inode->i_mutex);
2885                         mlog_errno(ret);
2886                         goto cleanup;
2887                 }
2888         }
2889         mutex_unlock(&tl_inode->i_mutex);
2890
2891         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2892                                         &xbs, &ctxt, ref_meta, &credits);
2893         if (ret) {
2894                 mlog_errno(ret);
2895                 goto cleanup;
2896         }
2897
2898         /* we need to update inode's ctime field, so add credit for it. */
2899         credits += OCFS2_INODE_UPDATE_CREDITS;
2900         ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
2901         if (IS_ERR(ctxt.handle)) {
2902                 ret = PTR_ERR(ctxt.handle);
2903                 mlog_errno(ret);
2904                 goto cleanup;
2905         }
2906
2907         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2908
2909         ocfs2_commit_trans(osb, ctxt.handle);
2910
2911         if (ctxt.data_ac)
2912                 ocfs2_free_alloc_context(ctxt.data_ac);
2913         if (ctxt.meta_ac)
2914                 ocfs2_free_alloc_context(ctxt.meta_ac);
2915         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2916                 ocfs2_schedule_truncate_log_flush(osb, 1);
2917         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2918
2919 cleanup:
2920         if (ref_tree)
2921                 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
2922         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2923         if (!value && !ret) {
2924                 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
2925                 if (ret)
2926                         mlog_errno(ret);
2927         }
2928         ocfs2_inode_unlock(inode, 1);
2929 cleanup_nolock:
2930         brelse(di_bh);
2931         brelse(xbs.xattr_bh);
2932         ocfs2_xattr_bucket_free(xbs.bucket);
2933
2934         return ret;
2935 }
2936
2937 /*
2938  * Find the xattr extent rec which may contains name_hash.
2939  * e_cpos will be the first name hash of the xattr rec.
2940  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2941  */
2942 static int ocfs2_xattr_get_rec(struct inode *inode,
2943                                u32 name_hash,
2944                                u64 *p_blkno,
2945                                u32 *e_cpos,
2946                                u32 *num_clusters,
2947                                struct ocfs2_extent_list *el)
2948 {
2949         int ret = 0, i;
2950         struct buffer_head *eb_bh = NULL;
2951         struct ocfs2_extent_block *eb;
2952         struct ocfs2_extent_rec *rec = NULL;
2953         u64 e_blkno = 0;
2954
2955         if (el->l_tree_depth) {
2956                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
2957                                       &eb_bh);
2958                 if (ret) {
2959                         mlog_errno(ret);
2960                         goto out;
2961                 }
2962
2963                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2964                 el = &eb->h_list;
2965
2966                 if (el->l_tree_depth) {
2967                         ocfs2_error(inode->i_sb,
2968                                     "Inode %lu has non zero tree depth in "
2969                                     "xattr tree block %llu\n", inode->i_ino,
2970                                     (unsigned long long)eb_bh->b_blocknr);
2971                         ret = -EROFS;
2972                         goto out;
2973                 }
2974         }
2975
2976         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2977                 rec = &el->l_recs[i];
2978
2979                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2980                         e_blkno = le64_to_cpu(rec->e_blkno);
2981                         break;
2982                 }
2983         }
2984
2985         if (!e_blkno) {
2986                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2987                             "record (%u, %u, 0) in xattr", inode->i_ino,
2988                             le32_to_cpu(rec->e_cpos),
2989                             ocfs2_rec_clusters(el, rec));
2990                 ret = -EROFS;
2991                 goto out;
2992         }
2993
2994         *p_blkno = le64_to_cpu(rec->e_blkno);
2995         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2996         if (e_cpos)
2997                 *e_cpos = le32_to_cpu(rec->e_cpos);
2998 out:
2999         brelse(eb_bh);
3000         return ret;
3001 }
3002
3003 typedef int (xattr_bucket_func)(struct inode *inode,
3004                                 struct ocfs2_xattr_bucket *bucket,
3005                                 void *para);
3006
3007 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3008                                    struct ocfs2_xattr_bucket *bucket,
3009                                    int name_index,
3010                                    const char *name,
3011                                    u32 name_hash,
3012                                    u16 *xe_index,
3013                                    int *found)
3014 {
3015         int i, ret = 0, cmp = 1, block_off, new_offset;
3016         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3017         size_t name_len = strlen(name);
3018         struct ocfs2_xattr_entry *xe = NULL;
3019         char *xe_name;
3020
3021         /*
3022          * We don't use binary search in the bucket because there
3023          * may be multiple entries with the same name hash.
3024          */
3025         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3026                 xe = &xh->xh_entries[i];
3027
3028                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
3029                         continue;
3030                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3031                         break;
3032
3033                 cmp = name_index - ocfs2_xattr_get_type(xe);
3034                 if (!cmp)
3035                         cmp = name_len - xe->xe_name_len;
3036                 if (cmp)
3037                         continue;
3038
3039                 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3040                                                         xh,
3041                                                         i,
3042                                                         &block_off,
3043                                                         &new_offset);
3044                 if (ret) {
3045                         mlog_errno(ret);
3046                         break;
3047                 }
3048
3049
3050                 xe_name = bucket_block(bucket, block_off) + new_offset;
3051                 if (!memcmp(name, xe_name, name_len)) {
3052                         *xe_index = i;
3053                         *found = 1;
3054                         ret = 0;
3055                         break;
3056                 }
3057         }
3058
3059         return ret;
3060 }
3061
3062 /*
3063  * Find the specified xattr entry in a series of buckets.
3064  * This series start from p_blkno and last for num_clusters.
3065  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3066  * the num of the valid buckets.
3067  *
3068  * Return the buffer_head this xattr should reside in. And if the xattr's
3069  * hash is in the gap of 2 buckets, return the lower bucket.
3070  */
3071 static int ocfs2_xattr_bucket_find(struct inode *inode,
3072                                    int name_index,
3073                                    const char *name,
3074                                    u32 name_hash,
3075                                    u64 p_blkno,
3076                                    u32 first_hash,
3077                                    u32 num_clusters,
3078                                    struct ocfs2_xattr_search *xs)
3079 {
3080         int ret, found = 0;
3081         struct ocfs2_xattr_header *xh = NULL;
3082         struct ocfs2_xattr_entry *xe = NULL;
3083         u16 index = 0;
3084         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3085         int low_bucket = 0, bucket, high_bucket;
3086         struct ocfs2_xattr_bucket *search;
3087         u32 last_hash;
3088         u64 blkno, lower_blkno = 0;
3089
3090         search = ocfs2_xattr_bucket_new(inode);
3091         if (!search) {
3092                 ret = -ENOMEM;
3093                 mlog_errno(ret);
3094                 goto out;
3095         }
3096
3097         ret = ocfs2_read_xattr_bucket(search, p_blkno);
3098         if (ret) {
3099                 mlog_errno(ret);
3100                 goto out;
3101         }
3102
3103         xh = bucket_xh(search);
3104         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3105         while (low_bucket <= high_bucket) {
3106                 ocfs2_xattr_bucket_relse(search);
3107
3108                 bucket = (low_bucket + high_bucket) / 2;
3109                 blkno = p_blkno + bucket * blk_per_bucket;
3110                 ret = ocfs2_read_xattr_bucket(search, blkno);
3111                 if (ret) {
3112                         mlog_errno(ret);
3113                         goto out;
3114                 }
3115
3116                 xh = bucket_xh(search);
3117                 xe = &xh->xh_entries[0];
3118                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3119                         high_bucket = bucket - 1;
3120                         continue;
3121                 }
3122
3123                 /*
3124                  * Check whether the hash of the last entry in our
3125                  * bucket is larger than the search one. for an empty
3126                  * bucket, the last one is also the first one.
3127                  */
3128                 if (xh->xh_count)
3129                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3130
3131                 last_hash = le32_to_cpu(xe->xe_name_hash);
3132
3133                 /* record lower_blkno which may be the insert place. */
3134                 lower_blkno = blkno;
3135
3136                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3137                         low_bucket = bucket + 1;
3138                         continue;
3139                 }
3140
3141                 /* the searched xattr should reside in this bucket if exists. */
3142                 ret = ocfs2_find_xe_in_bucket(inode, search,
3143                                               name_index, name, name_hash,
3144                                               &index, &found);
3145                 if (ret) {
3146                         mlog_errno(ret);
3147                         goto out;
3148                 }
3149                 break;
3150         }
3151
3152         /*
3153          * Record the bucket we have found.
3154          * When the xattr's hash value is in the gap of 2 buckets, we will
3155          * always set it to the previous bucket.
3156          */
3157         if (!lower_blkno)
3158                 lower_blkno = p_blkno;
3159
3160         /* This should be in cache - we just read it during the search */
3161         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3162         if (ret) {
3163                 mlog_errno(ret);
3164                 goto out;
3165         }
3166
3167         xs->header = bucket_xh(xs->bucket);
3168         xs->base = bucket_block(xs->bucket, 0);
3169         xs->end = xs->base + inode->i_sb->s_blocksize;
3170
3171         if (found) {
3172                 xs->here = &xs->header->xh_entries[index];
3173                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3174                      (unsigned long long)bucket_blkno(xs->bucket), index);
3175         } else
3176                 ret = -ENODATA;
3177
3178 out:
3179         ocfs2_xattr_bucket_free(search);
3180         return ret;
3181 }
3182
3183 static int ocfs2_xattr_index_block_find(struct inode *inode,
3184                                         struct buffer_head *root_bh,
3185                                         int name_index,
3186                                         const char *name,
3187                                         struct ocfs2_xattr_search *xs)
3188 {
3189         int ret;
3190         struct ocfs2_xattr_block *xb =
3191                         (struct ocfs2_xattr_block *)root_bh->b_data;
3192         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3193         struct ocfs2_extent_list *el = &xb_root->xt_list;
3194         u64 p_blkno = 0;
3195         u32 first_hash, num_clusters = 0;
3196         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3197
3198         if (le16_to_cpu(el->l_next_free_rec) == 0)
3199                 return -ENODATA;
3200
3201         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3202              name, name_hash, name_index);
3203
3204         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3205                                   &num_clusters, el);
3206         if (ret) {
3207                 mlog_errno(ret);
3208                 goto out;
3209         }
3210
3211         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3212
3213         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3214              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3215              first_hash);
3216
3217         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3218                                       p_blkno, first_hash, num_clusters, xs);
3219
3220 out:
3221         return ret;
3222 }
3223
3224 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3225                                        u64 blkno,
3226                                        u32 clusters,
3227                                        xattr_bucket_func *func,
3228                                        void *para)
3229 {
3230         int i, ret = 0;
3231         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3232         u32 num_buckets = clusters * bpc;
3233         struct ocfs2_xattr_bucket *bucket;
3234
3235         bucket = ocfs2_xattr_bucket_new(inode);
3236         if (!bucket) {
3237                 mlog_errno(-ENOMEM);
3238                 return -ENOMEM;
3239         }
3240
3241         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3242              clusters, (unsigned long long)blkno);
3243
3244         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3245                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3246                 if (ret) {
3247                         mlog_errno(ret);
3248                         break;
3249                 }
3250
3251                 /*
3252                  * The real bucket num in this series of blocks is stored
3253                  * in the 1st bucket.
3254                  */
3255                 if (i == 0)
3256                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3257
3258                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3259                      (unsigned long long)blkno,
3260                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3261                 if (func) {
3262                         ret = func(inode, bucket, para);
3263                         if (ret && ret != -ERANGE)
3264                                 mlog_errno(ret);
3265                         /* Fall through to bucket_relse() */
3266                 }
3267
3268                 ocfs2_xattr_bucket_relse(bucket);
3269                 if (ret)
3270                         break;
3271         }
3272
3273         ocfs2_xattr_bucket_free(bucket);
3274         return ret;
3275 }
3276
3277 struct ocfs2_xattr_tree_list {
3278         char *buffer;
3279         size_t buffer_size;
3280         size_t result;
3281 };
3282
3283 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3284                                              struct ocfs2_xattr_header *xh,
3285                                              int index,
3286                                              int *block_off,
3287                                              int *new_offset)
3288 {
3289         u16 name_offset;
3290
3291         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3292                 return -EINVAL;
3293
3294         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3295
3296         *block_off = name_offset >> sb->s_blocksize_bits;
3297         *new_offset = name_offset % sb->s_blocksize;
3298
3299         return 0;
3300 }
3301
3302 static int ocfs2_list_xattr_bucket(struct inode *inode,
3303                                    struct ocfs2_xattr_bucket *bucket,
3304                                    void *para)
3305 {
3306         int ret = 0, type;
3307         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3308         int i, block_off, new_offset;
3309         const char *prefix, *name;
3310
3311         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3312                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3313                 type = ocfs2_xattr_get_type(entry);
3314                 prefix = ocfs2_xattr_prefix(type);
3315
3316                 if (prefix) {
3317                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3318                                                                 bucket_xh(bucket),
3319                                                                 i,
3320                                                                 &block_off,
3321                                                                 &new_offset);
3322                         if (ret)
3323                                 break;
3324
3325                         name = (const char *)bucket_block(bucket, block_off) +
3326                                 new_offset;
3327                         ret = ocfs2_xattr_list_entry(xl->buffer,
3328                                                      xl->buffer_size,
3329                                                      &xl->result,
3330                                                      prefix, name,
3331                                                      entry->xe_name_len);
3332                         if (ret)
3333                                 break;
3334                 }
3335         }
3336
3337         return ret;
3338 }
3339
3340 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
3341                                            struct buffer_head *blk_bh,
3342                                            xattr_tree_rec_func *rec_func,
3343                                            void *para)
3344 {
3345         struct ocfs2_xattr_block *xb =
3346                         (struct ocfs2_xattr_block *)blk_bh->b_data;
3347         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
3348         int ret = 0;
3349         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3350         u64 p_blkno = 0;
3351
3352         if (!el->l_next_free_rec || !rec_func)
3353                 return 0;
3354
3355         while (name_hash > 0) {
3356                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3357                                           &e_cpos, &num_clusters, el);
3358                 if (ret) {
3359                         mlog_errno(ret);
3360                         break;
3361                 }
3362
3363                 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
3364                                num_clusters, para);
3365                 if (ret) {
3366                         if (ret != -ERANGE)
3367                                 mlog_errno(ret);
3368                         break;
3369                 }
3370
3371                 if (e_cpos == 0)
3372                         break;
3373
3374                 name_hash = e_cpos - 1;
3375         }
3376
3377         return ret;
3378
3379 }
3380
3381 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
3382                                      struct buffer_head *root_bh,
3383                                      u64 blkno, u32 cpos, u32 len, void *para)
3384 {
3385         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
3386                                            ocfs2_list_xattr_bucket, para);
3387 }
3388
3389 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3390                                              struct buffer_head *blk_bh,
3391                                              char *buffer,
3392                                              size_t buffer_size)
3393 {
3394         int ret;
3395         struct ocfs2_xattr_tree_list xl = {
3396                 .buffer = buffer,
3397                 .buffer_size = buffer_size,
3398                 .result = 0,
3399         };
3400
3401         ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
3402                                               ocfs2_list_xattr_tree_rec, &xl);
3403         if (ret) {
3404                 mlog_errno(ret);
3405                 goto out;
3406         }
3407
3408         ret = xl.result;
3409 out:
3410         return ret;
3411 }
3412
3413 static int cmp_xe(const void *a, const void *b)
3414 {
3415         const struct ocfs2_xattr_entry *l = a, *r = b;
3416         u32 l_hash = le32_to_cpu(l->xe_name_hash);
3417         u32 r_hash = le32_to_cpu(r->xe_name_hash);
3418
3419         if (l_hash > r_hash)
3420                 return 1;
3421         if (l_hash < r_hash)
3422                 return -1;
3423         return 0;
3424 }
3425
3426 static void swap_xe(void *a, void *b, int size)
3427 {
3428         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3429
3430         tmp = *l;
3431         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3432         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3433 }
3434
3435 /*
3436  * When the ocfs2_xattr_block is filled up, new bucket will be created
3437  * and all the xattr entries will be moved to the new bucket.
3438  * The header goes at the start of the bucket, and the names+values are
3439  * filled from the end.  This is why *target starts as the last buffer.
3440  * Note: we need to sort the entries since they are not saved in order
3441  * in the ocfs2_xattr_block.
3442  */
3443 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3444                                            struct buffer_head *xb_bh,
3445                                            struct ocfs2_xattr_bucket *bucket)
3446 {
3447         int i, blocksize = inode->i_sb->s_blocksize;
3448         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3449         u16 offset, size, off_change;
3450         struct ocfs2_xattr_entry *xe;
3451         struct ocfs2_xattr_block *xb =
3452                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
3453         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3454         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3455         u16 count = le16_to_cpu(xb_xh->xh_count);
3456         char *src = xb_bh->b_data;
3457         char *target = bucket_block(bucket, blks - 1);
3458
3459         mlog(0, "cp xattr from block %llu to bucket %llu\n",
3460              (unsigned long long)xb_bh->b_blocknr,
3461              (unsigned long long)bucket_blkno(bucket));
3462
3463         for (i = 0; i < blks; i++)
3464                 memset(bucket_block(bucket, i), 0, blocksize);
3465
3466         /*
3467          * Since the xe_name_offset is based on ocfs2_xattr_header,
3468          * there is a offset change corresponding to the change of
3469          * ocfs2_xattr_header's position.
3470          */
3471         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3472         xe = &xb_xh->xh_entries[count - 1];
3473         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3474         size = blocksize - offset;
3475
3476         /* copy all the names and values. */
3477         memcpy(target + offset, src + offset, size);
3478
3479         /* Init new header now. */
3480         xh->xh_count = xb_xh->xh_count;
3481         xh->xh_num_buckets = cpu_to_le16(1);
3482         xh->xh_name_value_len = cpu_to_le16(size);
3483         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3484
3485         /* copy all the entries. */
3486         target = bucket_block(bucket, 0);
3487         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3488         size = count * sizeof(struct ocfs2_xattr_entry);
3489         memcpy(target + offset, (char *)xb_xh + offset, size);
3490
3491         /* Change the xe offset for all the xe because of the move. */
3492         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3493                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3494         for (i = 0; i < count; i++)
3495                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3496
3497         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3498              offset, size, off_change);
3499
3500         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3501              cmp_xe, swap_xe);
3502 }
3503
3504 /*
3505  * After we move xattr from block to index btree, we have to
3506  * update ocfs2_xattr_search to the new xe and base.
3507  *
3508  * When the entry is in xattr block, xattr_bh indicates the storage place.
3509  * While if the entry is in index b-tree, "bucket" indicates the
3510  * real place of the xattr.
3511  */
3512 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3513                                             struct ocfs2_xattr_search *xs,
3514                                             struct buffer_head *old_bh)
3515 {
3516         char *buf = old_bh->b_data;
3517         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3518         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3519         int i;
3520
3521         xs->header = bucket_xh(xs->bucket);
3522         xs->base = bucket_block(xs->bucket, 0);
3523         xs->end = xs->base + inode->i_sb->s_blocksize;
3524
3525         if (xs->not_found)
3526                 return;
3527
3528         i = xs->here - old_xh->xh_entries;
3529         xs->here = &xs->header->xh_entries[i];
3530 }
3531
3532 static int ocfs2_xattr_create_index_block(struct inode *inode,
3533                                           struct ocfs2_xattr_search *xs,
3534                                           struct ocfs2_xattr_set_ctxt *ctxt)
3535 {
3536         int ret;
3537         u32 bit_off, len;
3538         u64 blkno;
3539         handle_t *handle = ctxt->handle;
3540         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3541         struct ocfs2_inode_info *oi = OCFS2_I(inode);
3542         struct buffer_head *xb_bh = xs->xattr_bh;
3543         struct ocfs2_xattr_block *xb =
3544                         (struct ocfs2_xattr_block *)xb_bh->b_data;
3545         struct ocfs2_xattr_tree_root *xr;
3546         u16 xb_flags = le16_to_cpu(xb->xb_flags);
3547
3548         mlog(0, "create xattr index block for %llu\n",
3549              (unsigned long long)xb_bh->b_blocknr);
3550
3551         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3552         BUG_ON(!xs->bucket);
3553
3554         /*
3555          * XXX:
3556          * We can use this lock for now, and maybe move to a dedicated mutex
3557          * if performance becomes a problem later.
3558          */
3559         down_write(&oi->ip_alloc_sem);
3560
3561         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
3562                                       OCFS2_JOURNAL_ACCESS_WRITE);
3563         if (ret) {
3564                 mlog_errno(ret);
3565                 goto out;
3566         }
3567
3568         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3569                                      1, 1, &bit_off, &len);
3570         if (ret) {
3571                 mlog_errno(ret);
3572                 goto out;
3573         }
3574
3575         /*
3576          * The bucket may spread in many blocks, and
3577          * we will only touch the 1st block and the last block
3578          * in the whole bucket(one for entry and one for data).
3579          */
3580         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3581
3582         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3583              (unsigned long long)blkno);
3584
3585         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3586         if (ret) {
3587                 mlog_errno(ret);
3588                 goto out;
3589         }
3590
3591         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3592                                                 OCFS2_JOURNAL_ACCESS_CREATE);
3593         if (ret) {
3594                 mlog_errno(ret);
3595                 goto out;
3596         }
3597
3598         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3599         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3600
3601         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3602
3603         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3604         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3605                offsetof(struct ocfs2_xattr_block, xb_attrs));
3606
3607         xr = &xb->xb_attrs.xb_root;
3608         xr->xt_clusters = cpu_to_le32(1);
3609         xr->xt_last_eb_blk = 0;
3610         xr->xt_list.l_tree_depth = 0;
3611         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3612         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3613
3614         xr->xt_list.l_recs[0].e_cpos = 0;
3615         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3616         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3617
3618         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3619
3620         ocfs2_journal_dirty(handle, xb_bh);
3621
3622 out:
3623         up_write(&oi->ip_alloc_sem);
3624
3625         return ret;
3626 }
3627
3628 static int cmp_xe_offset(const void *a, const void *b)
3629 {
3630         const struct ocfs2_xattr_entry *l = a, *r = b;
3631         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3632         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3633
3634         if (l_name_offset < r_name_offset)
3635                 return 1;
3636         if (l_name_offset > r_name_offset)
3637                 return -1;
3638         return 0;
3639 }
3640
3641 /*
3642  * defrag a xattr bucket if we find that the bucket has some
3643  * holes beteen name/value pairs.
3644  * We will move all the name/value pairs to the end of the bucket
3645  * so that we can spare some space for insertion.
3646  */
3647 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3648                                      handle_t *handle,
3649                                      struct ocfs2_xattr_bucket *bucket)
3650 {
3651         int ret, i;
3652         size_t end, offset, len, value_len;
3653         struct ocfs2_xattr_header *xh;
3654         char *entries, *buf, *bucket_buf = NULL;
3655         u64 blkno = bucket_blkno(bucket);
3656         u16 xh_free_start;
3657         size_t blocksize = inode->i_sb->s_blocksize;
3658         struct ocfs2_xattr_entry *xe;
3659
3660         /*
3661          * In order to make the operation more efficient and generic,
3662          * we copy all the blocks into a contiguous memory and do the
3663          * defragment there, so if anything is error, we will not touch
3664          * the real block.
3665          */
3666         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3667         if (!bucket_buf) {
3668                 ret = -EIO;
3669                 goto out;
3670         }
3671
3672         buf = bucket_buf;
3673         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3674                 memcpy(buf, bucket_block(bucket, i), blocksize);
3675
3676         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3677                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3678         if (ret < 0) {
3679                 mlog_errno(ret);
3680                 goto out;
3681         }
3682
3683         xh = (struct ocfs2_xattr_header *)bucket_buf;
3684         entries = (char *)xh->xh_entries;
3685         xh_free_start = le16_to_cpu(xh->xh_free_start);
3686
3687         mlog(0, "adjust xattr bucket in %llu, count = %u, "
3688              "xh_free_start = %u, xh_name_value_len = %u.\n",
3689              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3690              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3691
3692         /*
3693          * sort all the entries by their offset.
3694          * the largest will be the first, so that we can
3695          * move them to the end one by one.
3696          */
3697         sort(entries, le16_to_cpu(xh->xh_count),
3698              sizeof(struct ocfs2_xattr_entry),
3699              cmp_xe_offset, swap_xe);
3700
3701         /* Move all name/values to the end of the bucket. */
3702         xe = xh->xh_entries;
3703         end = OCFS2_XATTR_BUCKET_SIZE;
3704         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3705                 offset = le16_to_cpu(xe->xe_name_offset);
3706                 if (ocfs2_xattr_is_local(xe))
3707                         value_len = OCFS2_XATTR_SIZE(
3708                                         le64_to_cpu(xe->xe_value_size));
3709                 else
3710                         value_len = OCFS2_XATTR_ROOT_SIZE;
3711                 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3712
3713                 /*
3714                  * We must make sure that the name/value pair
3715                  * exist in the same block. So adjust end to
3716                  * the previous block end if needed.
3717                  */
3718                 if (((end - len) / blocksize !=
3719                         (end - 1) / blocksize))
3720                         end = end - end % blocksize;
3721
3722                 if (end > offset + len) {
3723                         memmove(bucket_buf + end - len,
3724                                 bucket_buf + offset, len);
3725                         xe->xe_name_offset = cpu_to_le16(end - len);
3726                 }
3727
3728                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3729                                 "bucket %llu\n", (unsigned long long)blkno);
3730
3731                 end -= len;
3732         }
3733
3734         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3735                         "bucket %llu\n", (unsigned long long)blkno);
3736
3737         if (xh_free_start == end)
3738                 goto out;
3739
3740         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3741         xh->xh_free_start = cpu_to_le16(end);
3742
3743         /* sort the entries by their name_hash. */
3744         sort(entries, le16_to_cpu(xh->xh_count),
3745              sizeof(struct ocfs2_xattr_entry),
3746              cmp_xe, swap_xe);
3747
3748         buf = bucket_buf;
3749         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3750                 memcpy(bucket_block(bucket, i), buf, blocksize);
3751         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3752
3753 out:
3754         kfree(bucket_buf);
3755         return ret;
3756 }
3757
3758 /*
3759  * prev_blkno points to the start of an existing extent.  new_blkno
3760  * points to a newly allocated extent.  Because we know each of our
3761  * clusters contains more than bucket, we can easily split one cluster
3762  * at a bucket boundary.  So we take the last cluster of the existing
3763  * extent and split it down the middle.  We move the last half of the
3764  * buckets in the last cluster of the existing extent over to the new
3765  * extent.
3766  *
3767  * first_bh is the buffer at prev_blkno so we can update the existing
3768  * extent's bucket count.  header_bh is the bucket were we were hoping
3769  * to insert our xattr.  If the bucket move places the target in the new
3770  * extent, we'll update first_bh and header_bh after modifying the old
3771  * extent.
3772  *
3773  * first_hash will be set as the 1st xe's name_hash in the new extent.
3774  */
3775 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3776                                                handle_t *handle,
3777                                                struct ocfs2_xattr_bucket *first,
3778                                                struct ocfs2_xattr_bucket *target,
3779                                                u64 new_blkno,
3780                                                u32 num_clusters,
3781                                                u32 *first_hash)
3782 {
3783         int ret;
3784         struct super_block *sb = inode->i_sb;
3785         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
3786         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
3787         int to_move = num_buckets / 2;
3788         u64 src_blkno;
3789         u64 last_cluster_blkno = bucket_blkno(first) +
3790                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
3791
3792         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
3793         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
3794
3795         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3796              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
3797
3798         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
3799                                      last_cluster_blkno, new_blkno,
3800                                      to_move, first_hash);
3801         if (ret) {
3802                 mlog_errno(ret);
3803                 goto out;
3804         }
3805
3806         /* This is the first bucket that got moved */
3807         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
3808
3809         /*
3810          * If the target bucket was part of the moved buckets, we need to
3811          * update first and target.
3812          */
3813         if (bucket_blkno(target) >= src_blkno) {
3814                 /* Find the block for the new target bucket */
3815                 src_blkno = new_blkno +
3816                         (bucket_blkno(target) - src_blkno);
3817
3818                 ocfs2_xattr_bucket_relse(first);
3819                 ocfs2_xattr_bucket_relse(target);
3820
3821                 /*
3822                  * These shouldn't fail - the buffers are in the
3823                  * journal from ocfs2_cp_xattr_bucket().
3824                  */
3825                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
3826                 if (ret) {
3827                         mlog_errno(ret);
3828                         goto out;
3829                 }
3830                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
3831                 if (ret)
3832                         mlog_errno(ret);
3833
3834         }
3835
3836 out:
3837         return ret;
3838 }
3839
3840 /*
3841  * Find the suitable pos when we divide a bucket into 2.
3842  * We have to make sure the xattrs with the same hash value exist
3843  * in the same bucket.
3844  *
3845  * If this ocfs2_xattr_header covers more than one hash value, find a
3846  * place where the hash value changes.  Try to find the most even split.
3847  * The most common case is that all entries have different hash values,
3848  * and the first check we make will find a place to split.
3849  */
3850 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3851 {
3852         struct ocfs2_xattr_entry *entries = xh->xh_entries;
3853         int count = le16_to_cpu(xh->xh_count);
3854         int delta, middle = count / 2;
3855
3856         /*
3857          * We start at the middle.  Each step gets farther away in both
3858          * directions.  We therefore hit the change in hash value
3859          * nearest to the middle.  Note that this loop does not execute for
3860          * count < 2.
3861          */
3862         for (delta = 0; delta < middle; delta++) {
3863                 /* Let's check delta earlier than middle */
3864                 if (cmp_xe(&entries[middle - delta - 1],
3865                            &entries[middle - delta]))
3866                         return middle - delta;
3867
3868                 /* For even counts, don't walk off the end */
3869                 if ((middle + delta + 1) == count)
3870                         continue;
3871
3872                 /* Now try delta past middle */
3873                 if (cmp_xe(&entries[middle + delta],
3874                            &entries[middle + delta + 1]))
3875                         return middle + delta + 1;
3876         }
3877
3878         /* Every entry had the same hash */
3879         return count;
3880 }
3881
3882 /*
3883  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3884  * first_hash will record the 1st hash of the new bucket.
3885  *
3886  * Normally half of the xattrs will be moved.  But we have to make
3887  * sure that the xattrs with the same hash value are stored in the
3888  * same bucket. If all the xattrs in this bucket have the same hash
3889  * value, the new bucket will be initialized as an empty one and the
3890  * first_hash will be initialized as (hash_value+1).
3891  */
3892 static int ocfs2_divide_xattr_bucket(struct inode *inode,
3893                                     handle_t *handle,
3894                                     u64 blk,
3895                                     u64 new_blk,
3896                                     u32 *first_hash,
3897                                     int new_bucket_head)
3898 {
3899         int ret, i;
3900         int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3901         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3902         struct ocfs2_xattr_header *xh;
3903         struct ocfs2_xattr_entry *xe;
3904         int blocksize = inode->i_sb->s_blocksize;
3905
3906         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3907              (unsigned long long)blk, (unsigned long long)new_blk);
3908
3909         s_bucket = ocfs2_xattr_bucket_new(inode);
3910         t_bucket = ocfs2_xattr_bucket_new(inode);
3911         if (!s_bucket || !t_bucket) {
3912                 ret = -ENOMEM;
3913                 mlog_errno(ret);
3914                 goto out;
3915         }
3916
3917         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3918         if (ret) {
3919                 mlog_errno(ret);
3920                 goto out;
3921         }
3922
3923         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3924                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3925         if (ret) {
3926                 mlog_errno(ret);
3927                 goto out;
3928         }
3929
3930         /*
3931          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
3932          * there's no need to read it.
3933          */
3934         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3935         if (ret) {
3936                 mlog_errno(ret);
3937                 goto out;
3938         }
3939
3940         /*
3941          * Hey, if we're overwriting t_bucket, what difference does
3942          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
3943          * same part of ocfs2_cp_xattr_bucket().
3944          */
3945         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3946                                                 new_bucket_head ?
3947                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3948                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3949         if (ret) {
3950                 mlog_errno(ret);
3951                 goto out;
3952         }
3953
3954         xh = bucket_xh(s_bucket);
3955         count = le16_to_cpu(xh->xh_count);
3956         start = ocfs2_xattr_find_divide_pos(xh);
3957
3958         if (start == count) {
3959                 xe = &xh->xh_entries[start-1];
3960
3961                 /*
3962                  * initialized a new empty bucket here.
3963                  * The hash value is set as one larger than
3964                  * that of the last entry in the previous bucket.
3965                  */
3966                 for (i = 0; i < t_bucket->bu_blocks; i++)
3967                         memset(bucket_block(t_bucket, i), 0, blocksize);
3968
3969                 xh = bucket_xh(t_bucket);
3970                 xh->xh_free_start = cpu_to_le16(blocksize);
3971                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3972                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3973
3974                 goto set_num_buckets;
3975         }
3976
3977         /* copy the whole bucket to the new first. */
3978         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3979
3980         /* update the new bucket. */
3981         xh = bucket_xh(t_bucket);
3982
3983         /*
3984          * Calculate the total name/value len and xh_free_start for
3985          * the old bucket first.
3986          */
3987         name_offset = OCFS2_XATTR_BUCKET_SIZE;
3988         name_value_len = 0;
3989         for (i = 0; i < start; i++) {
3990                 xe = &xh->xh_entries[i];
3991                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3992                 if (ocfs2_xattr_is_local(xe))
3993                         xe_len +=
3994                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3995                 else
3996                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3997                 name_value_len += xe_len;
3998                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3999                         name_offset = le16_to_cpu(xe->xe_name_offset);
4000         }
4001
4002         /*
4003          * Now begin the modification to the new bucket.
4004          *
4005          * In the new bucket, We just move the xattr entry to the beginning
4006          * and don't touch the name/value. So there will be some holes in the
4007          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4008          * called.
4009          */
4010         xe = &xh->xh_entries[start];
4011         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4012         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4013              (int)((char *)xe - (char *)xh),
4014              (int)((char *)xh->xh_entries - (char *)xh));
4015         memmove((char *)xh->xh_entries, (char *)xe, len);
4016         xe = &xh->xh_entries[count - start];
4017         len = sizeof(struct ocfs2_xattr_entry) * start;
4018         memset((char *)xe, 0, len);
4019
4020         le16_add_cpu(&xh->xh_count, -start);
4021         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4022
4023         /* Calculate xh_free_start for the new bucket. */
4024         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4025         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4026                 xe = &xh->xh_entries[i];
4027                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
4028                 if (ocfs2_xattr_is_local(xe))
4029                         xe_len +=
4030                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4031                 else
4032                         xe_len += OCFS2_XATTR_ROOT_SIZE;
4033                 if (le16_to_cpu(xe->xe_name_offset) <
4034                     le16_to_cpu(xh->xh_free_start))
4035                         xh->xh_free_start = xe->xe_name_offset;
4036         }
4037
4038 set_num_buckets:
4039         /* set xh->xh_num_buckets for the new xh. */
4040         if (new_bucket_head)
4041                 xh->xh_num_buckets = cpu_to_le16(1);
4042         else
4043                 xh->xh_num_buckets = 0;
4044
4045         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4046
4047         /* store the first_hash of the new bucket. */
4048         if (first_hash)
4049                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4050
4051         /*
4052          * Now only update the 1st block of the old bucket.  If we
4053          * just added a new empty bucket, there is no need to modify
4054          * it.
4055          */
4056         if (start == count)
4057                 goto out;
4058
4059         xh = bucket_xh(s_bucket);
4060         memset(&xh->xh_entries[start], 0,
4061                sizeof(struct ocfs2_xattr_entry) * (count - start));
4062         xh->xh_count = cpu_to_le16(start);
4063         xh->xh_free_start = cpu_to_le16(name_offset);
4064         xh->xh_name_value_len = cpu_to_le16(name_value_len);
4065
4066         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4067
4068 out:
4069         ocfs2_xattr_bucket_free(s_bucket);
4070         ocfs2_xattr_bucket_free(t_bucket);
4071
4072         return ret;
4073 }
4074
4075 /*
4076  * Copy xattr from one bucket to another bucket.
4077  *
4078  * The caller must make sure that the journal transaction
4079  * has enough space for journaling.
4080  */
4081 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4082                                  handle_t *handle,
4083                                  u64 s_blkno,
4084                                  u64 t_blkno,
4085                                  int t_is_new)
4086 {
4087         int ret;
4088         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4089
4090         BUG_ON(s_blkno == t_blkno);
4091
4092         mlog(0, "cp bucket %llu to %llu, target is %d\n",
4093              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4094              t_is_new);
4095
4096         s_bucket = ocfs2_xattr_bucket_new(inode);
4097         t_bucket = ocfs2_xattr_bucket_new(inode);
4098         if (!s_bucket || !t_bucket) {
4099                 ret = -ENOMEM;
4100                 mlog_errno(ret);
4101                 goto out;
4102         }
4103
4104         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4105         if (ret)
4106                 goto out;
4107
4108         /*
4109          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4110          * there's no need to read it.
4111          */
4112         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4113         if (ret)
4114                 goto out;
4115
4116         /*
4117          * Hey, if we're overwriting t_bucket, what difference does
4118          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4119          * cluster to fill, we came here from
4120          * ocfs2_mv_xattr_buckets(), and it is really new -
4121          * ACCESS_CREATE is required.  But we also might have moved data
4122          * out of t_bucket before extending back into it.
4123          * ocfs2_add_new_xattr_bucket() can do this - its call to
4124          * ocfs2_add_new_xattr_cluster() may have created a new extent
4125          * and copied out the end of the old extent.  Then it re-extends
4126          * the old extent back to create space for new xattrs.  That's
4127          * how we get here, and the bucket isn't really new.
4128          */
4129         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4130                                                 t_is_new ?
4131                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4132                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4133         if (ret)
4134                 goto out;
4135
4136         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4137         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4138
4139 out:
4140         ocfs2_xattr_bucket_free(t_bucket);
4141         ocfs2_xattr_bucket_free(s_bucket);
4142
4143         return ret;
4144 }
4145
4146 /*
4147  * src_blk points to the start of an existing extent.  last_blk points to
4148  * last cluster in that extent.  to_blk points to a newly allocated
4149  * extent.  We copy the buckets from the cluster at last_blk to the new
4150  * extent.  If start_bucket is non-zero, we skip that many buckets before
4151  * we start copying.  The new extent's xh_num_buckets gets set to the
4152  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4153  * by the same amount.
4154  */
4155 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4156                                   u64 src_blk, u64 last_blk, u64 to_blk,
4157                                   unsigned int start_bucket,
4158                                   u32 *first_hash)
4159 {
4160         int i, ret, credits;
4161         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4162         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4163         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4164         struct ocfs2_xattr_bucket *old_first, *new_first;
4165
4166         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4167              (unsigned long long)last_blk, (unsigned long long)to_blk);
4168
4169         BUG_ON(start_bucket >= num_buckets);
4170         if (start_bucket) {
4171                 num_buckets -= start_bucket;
4172                 last_blk += (start_bucket * blks_per_bucket);
4173         }
4174
4175         /* The first bucket of the original extent */
4176         old_first = ocfs2_xattr_bucket_new(inode);
4177         /* The first bucket of the new extent */
4178         new_first = ocfs2_xattr_bucket_new(inode);
4179         if (!old_first || !new_first) {
4180                 ret = -ENOMEM;
4181                 mlog_errno(ret);
4182                 goto out;
4183         }
4184
4185         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4186         if (ret) {
4187                 mlog_errno(ret);
4188                 goto out;
4189         }
4190
4191         /*
4192          * We need to update the first bucket of the old extent and all
4193          * the buckets going to the new extent.
4194          */
4195         credits = ((num_buckets + 1) * blks_per_bucket) +
4196                 handle->h_buffer_credits;
4197         ret = ocfs2_extend_trans(handle, credits);
4198         if (ret) {
4199                 mlog_errno(ret);
4200                 goto out;
4201         }
4202
4203         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4204                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4205         if (ret) {
4206                 mlog_errno(ret);
4207                 goto out;
4208         }
4209
4210         for (i = 0; i < num_buckets; i++) {
4211                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4212                                             last_blk + (i * blks_per_bucket),
4213                                             to_blk + (i * blks_per_bucket),
4214                                             1);
4215                 if (ret) {
4216                         mlog_errno(ret);
4217                         goto out;
4218                 }
4219         }
4220
4221         /*
4222          * Get the new bucket ready before we dirty anything
4223          * (This actually shouldn't fail, because we already dirtied
4224          * it once in ocfs2_cp_xattr_bucket()).
4225          */
4226         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4227         if (ret) {
4228                 mlog_errno(ret);
4229                 goto out;
4230         }
4231         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4232                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4233         if (ret) {
4234                 mlog_errno(ret);
4235                 goto out;
4236         }
4237
4238         /* Now update the headers */
4239         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4240         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4241
4242         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4243         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4244
4245         if (first_hash)
4246                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4247
4248 out:
4249         ocfs2_xattr_bucket_free(new_first);
4250         ocfs2_xattr_bucket_free(old_first);
4251         return ret;
4252 }
4253
4254 /*
4255  * Move some xattrs in this cluster to the new cluster.
4256  * This function should only be called when bucket size == cluster size.
4257  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4258  */
4259 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4260                                       handle_t *handle,
4261                                       u64 prev_blk,
4262                                       u64 new_blk,
4263                                       u32 *first_hash)
4264 {
4265         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4266         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4267
4268         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4269
4270         ret = ocfs2_extend_trans(handle, credits);
4271         if (ret) {
4272                 mlog_errno(ret);
4273                 return ret;
4274         }
4275
4276         /* Move half of the xattr in start_blk to the next bucket. */
4277         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4278                                           new_blk, first_hash, 1);
4279 }
4280
4281 /*
4282  * Move some xattrs from the old cluster to the new one since they are not
4283  * contiguous in ocfs2 xattr tree.
4284  *
4285  * new_blk starts a new separate cluster, and we will move some xattrs from
4286  * prev_blk to it. v_start will be set as the first name hash value in this
4287  * new cluster so that it can be used as e_cpos during tree insertion and
4288  * don't collide with our original b-tree operations. first_bh and header_bh
4289  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4290  * to extend the insert bucket.
4291  *
4292  * The problem is how much xattr should we move to the new one and when should
4293  * we update first_bh and header_bh?
4294  * 1. If cluster size > bucket size, that means the previous cluster has more
4295  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4296  *    update the first_bh and header_bh if the insert bucket has been moved
4297  *    to the new cluster.
4298  * 2. If cluster_size == bucket_size:
4299  *    a) If the previous extent rec has more than one cluster and the insert
4300  *       place isn't in the last cluster, copy the entire last cluster to the
4301  *       new one. This time, we don't need to upate the first_bh and header_bh
4302  *       since they will not be moved into the new cluster.
4303  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4304  *       the new one. And we set the extend flag to zero if the insert place is
4305  *       moved into the new allocated cluster since no extend is needed.
4306  */
4307 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4308                                             handle_t *handle,
4309                                             struct ocfs2_xattr_bucket *first,
4310                                             struct ocfs2_xattr_bucket *target,
4311                                             u64 new_blk,
4312                                             u32 prev_clusters,
4313                                             u32 *v_start,
4314                                             int *extend)
4315 {
4316         int ret;
4317
4318         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4319              (unsigned long long)bucket_blkno(first), prev_clusters,
4320              (unsigned long long)new_blk);
4321
4322         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4323                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4324                                                           handle,
4325                                                           first, target,
4326                                                           new_blk,
4327                                                           prev_clusters,
4328                                                           v_start);
4329                 if (ret)
4330                         mlog_errno(ret);
4331         } else {
4332                 /* The start of the last cluster in the first extent */
4333                 u64 last_blk = bucket_blkno(first) +
4334                         ((prev_clusters - 1) *
4335                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
4336
4337                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4338                         ret = ocfs2_mv_xattr_buckets(inode, handle,
4339                                                      bucket_blkno(first),
4340                                                      last_blk, new_blk, 0,
4341                                                      v_start);
4342                         if (ret)
4343                                 mlog_errno(ret);
4344                 } else {
4345                         ret = ocfs2_divide_xattr_cluster(inode, handle,
4346                                                          last_blk, new_blk,
4347                                                          v_start);
4348                         if (ret)
4349                                 mlog_errno(ret);
4350
4351                         if ((bucket_blkno(target) == last_blk) && extend)
4352                                 *extend = 0;
4353                 }
4354         }
4355
4356         return ret;
4357 }
4358
4359 /*
4360  * Add a new cluster for xattr storage.
4361  *
4362  * If the new cluster is contiguous with the previous one, it will be
4363  * appended to the same extent record, and num_clusters will be updated.
4364  * If not, we will insert a new extent for it and move some xattrs in
4365  * the last cluster into the new allocated one.
4366  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4367  * lose the benefits of hashing because we'll have to search large leaves.
4368  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4369  * if it's bigger).
4370  *
4371  * first_bh is the first block of the previous extent rec and header_bh
4372  * indicates the bucket we will insert the new xattrs. They will be updated
4373  * when the header_bh is moved into the new cluster.
4374  */
4375 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4376                                        struct buffer_head *root_bh,
4377                                        struct ocfs2_xattr_bucket *first,
4378                                        struct ocfs2_xattr_bucket *target,
4379                                        u32 *num_clusters,
4380                                        u32 prev_cpos,
4381                                        int *extend,
4382                                        struct ocfs2_xattr_set_ctxt *ctxt)
4383 {
4384         int ret;
4385         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4386         u32 prev_clusters = *num_clusters;
4387         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4388         u64 block;
4389         handle_t *handle = ctxt->handle;
4390         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4391         struct ocfs2_extent_tree et;
4392
4393         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4394              "previous xattr blkno = %llu\n",
4395              (unsigned long long)OCFS2_I(inode)->ip_blkno,
4396              prev_cpos, (unsigned long long)bucket_blkno(first));
4397
4398         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4399
4400         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4401                                       OCFS2_JOURNAL_ACCESS_WRITE);
4402         if (ret < 0) {
4403                 mlog_errno(ret);
4404                 goto leave;
4405         }
4406
4407         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4408                                      clusters_to_add, &bit_off, &num_bits);
4409         if (ret < 0) {
4410                 if (ret != -ENOSPC)
4411                         mlog_errno(ret);
4412                 goto leave;
4413         }
4414
4415         BUG_ON(num_bits > clusters_to_add);
4416
4417         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4418         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4419              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4420
4421         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
4422             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4423              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4424                 /*
4425                  * If this cluster is contiguous with the old one and
4426                  * adding this new cluster, we don't surpass the limit of
4427                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4428                  * initialized and used like other buckets in the previous
4429                  * cluster.
4430                  * So add it as a contiguous one. The caller will handle
4431                  * its init process.
4432                  */
4433                 v_start = prev_cpos + prev_clusters;
4434                 *num_clusters = prev_clusters + num_bits;
4435                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4436                      num_bits);
4437         } else {
4438                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
4439                                                        handle,
4440                                                        first,
4441                                                        target,
4442                                                        block,
4443                                                        prev_clusters,
4444                                                        &v_start,
4445                                                        extend);
4446                 if (ret) {
4447                         mlog_errno(ret);
4448                         goto leave;
4449                 }
4450         }
4451
4452         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4453              num_bits, (unsigned long long)block, v_start);
4454         ret = ocfs2_insert_extent(handle, &et, v_start, block,
4455                                   num_bits, 0, ctxt->meta_ac);
4456         if (ret < 0) {
4457                 mlog_errno(ret);
4458                 goto leave;
4459         }
4460
4461         ret = ocfs2_journal_dirty(handle, root_bh);
4462         if (ret < 0)
4463                 mlog_errno(ret);
4464
4465 leave:
4466         return ret;
4467 }
4468
4469 /*
4470  * We are given an extent.  'first' is the bucket at the very front of
4471  * the extent.  The extent has space for an additional bucket past
4472  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
4473  * of the target bucket.  We wish to shift every bucket past the target
4474  * down one, filling in that additional space.  When we get back to the
4475  * target, we split the target between itself and the now-empty bucket
4476  * at target+1 (aka, target_blkno + blks_per_bucket).
4477  */
4478 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4479                                      handle_t *handle,
4480                                      struct ocfs2_xattr_bucket *first,
4481                                      u64 target_blk,
4482                                      u32 num_clusters)
4483 {
4484         int ret, credits;
4485         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4486         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4487         u64 end_blk;
4488         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
4489
4490         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4491              "from %llu, len = %u\n", (unsigned long long)target_blk,
4492              (unsigned long long)bucket_blkno(first), num_clusters);
4493
4494         /* The extent must have room for an additional bucket */
4495         BUG_ON(new_bucket >=
4496                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
4497
4498         /* end_blk points to the last existing bucket */
4499         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
4500
4501         /*
4502          * end_blk is the start of the last existing bucket.
4503          * Thus, (end_blk - target_blk) covers the target bucket and
4504          * every bucket after it up to, but not including, the last
4505          * existing bucket.  Then we add the last existing bucket, the
4506          * new bucket, and the first bucket (3 * blk_per_bucket).
4507          */
4508         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
4509                   handle->h_buffer_credits;
4510         ret = ocfs2_extend_trans(handle, credits);
4511         if (ret) {
4512                 mlog_errno(ret);
4513                 goto out;
4514         }
4515
4516         ret = ocfs2_xattr_bucket_journal_access(handle, first,
4517                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4518         if (ret) {
4519                 mlog_errno(ret);
4520                 goto out;
4521         }
4522
4523         while (end_blk != target_blk) {
4524                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4525                                             end_blk + blk_per_bucket, 0);
4526                 if (ret)
4527                         goto out;
4528                 end_blk -= blk_per_bucket;
4529         }
4530
4531         /* Move half of the xattr in target_blkno to the next bucket. */
4532         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
4533                                         target_blk + blk_per_bucket, NULL, 0);
4534
4535         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
4536         ocfs2_xattr_bucket_journal_dirty(handle, first);
4537
4538 out:
4539         return ret;
4540 }
4541
4542 /*
4543  * Add new xattr bucket in an extent record and adjust the buckets
4544  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
4545  * bucket we want to insert into.
4546  *
4547  * In the easy case, we will move all the buckets after target down by
4548  * one. Half of target's xattrs will be moved to the next bucket.
4549  *
4550  * If current cluster is full, we'll allocate a new one.  This may not
4551  * be contiguous.  The underlying calls will make sure that there is
4552  * space for the insert, shifting buckets around if necessary.
4553  * 'target' may be moved by those calls.
4554  */
4555 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4556                                       struct buffer_head *xb_bh,
4557                                       struct ocfs2_xattr_bucket *target,
4558                                       struct ocfs2_xattr_set_ctxt *ctxt)
4559 {
4560         struct ocfs2_xattr_block *xb =
4561                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4562         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4563         struct ocfs2_extent_list *el = &xb_root->xt_list;
4564         u32 name_hash =
4565                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
4566         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4567         int ret, num_buckets, extend = 1;
4568         u64 p_blkno;
4569         u32 e_cpos, num_clusters;
4570         /* The bucket at the front of the extent */
4571         struct ocfs2_xattr_bucket *first;
4572
4573         mlog(0, "Add new xattr bucket starting from %llu\n",
4574              (unsigned long long)bucket_blkno(target));
4575
4576         /* The first bucket of the original extent */
4577         first = ocfs2_xattr_bucket_new(inode);
4578         if (!first) {
4579                 ret = -ENOMEM;
4580                 mlog_errno(ret);
4581                 goto out;
4582         }
4583
4584         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4585                                   &num_clusters, el);
4586         if (ret) {
4587                 mlog_errno(ret);
4588                 goto out;
4589         }
4590
4591         ret = ocfs2_read_xattr_bucket(first, p_blkno);
4592         if (ret) {
4593                 mlog_errno(ret);
4594                 goto out;
4595         }
4596
4597         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4598         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
4599                 /*
4600                  * This can move first+target if the target bucket moves
4601                  * to the new extent.
4602                  */
4603                 ret = ocfs2_add_new_xattr_cluster(inode,
4604                                                   xb_bh,
4605                                                   first,
4606                                                   target,
4607                                                   &num_clusters,
4608                                                   e_cpos,
4609                                                   &extend,
4610                                                   ctxt);
4611                 if (ret) {
4612                         mlog_errno(ret);
4613                         goto out;
4614                 }
4615         }
4616
4617         if (extend) {
4618                 ret = ocfs2_extend_xattr_bucket(inode,
4619                                                 ctxt->handle,
4620                                                 first,
4621                                                 bucket_blkno(target),
4622                                                 num_clusters);
4623                 if (ret)
4624                         mlog_errno(ret);
4625         }
4626
4627 out:
4628         ocfs2_xattr_bucket_free(first);
4629
4630         return ret;
4631 }
4632
4633 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4634                                         struct ocfs2_xattr_bucket *bucket,
4635                                         int offs)
4636 {
4637         int block_off = offs >> inode->i_sb->s_blocksize_bits;
4638
4639         offs = offs % inode->i_sb->s_blocksize;
4640         return bucket_block(bucket, block_off) + offs;
4641 }
4642
4643 /*
4644  * Handle the normal xattr set, including replace, delete and new.
4645  *
4646  * Note: "local" indicates the real data's locality. So we can't
4647  * just its bucket locality by its length.
4648  */
4649 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4650                                          struct ocfs2_xattr_info *xi,
4651                                          struct ocfs2_xattr_search *xs,
4652                                          u32 name_hash,
4653                                          int local)
4654 {
4655         struct ocfs2_xattr_entry *last, *xe;
4656         int name_len = strlen(xi->name);
4657         struct ocfs2_xattr_header *xh = xs->header;
4658         u16 count = le16_to_cpu(xh->xh_count), start;
4659         size_t blocksize = inode->i_sb->s_blocksize;
4660         char *val;
4661         size_t offs, size, new_size;
4662
4663         last = &xh->xh_entries[count];
4664         if (!xs->not_found) {
4665                 xe = xs->here;
4666                 offs = le16_to_cpu(xe->xe_name_offset);
4667                 if (ocfs2_xattr_is_local(xe))
4668                         size = OCFS2_XATTR_SIZE(name_len) +
4669                         OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4670                 else
4671                         size = OCFS2_XATTR_SIZE(name_len) +
4672                         OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4673
4674                 /*
4675                  * If the new value will be stored outside, xi->value has been
4676                  * initalized as an empty ocfs2_xattr_value_root, and the same
4677                  * goes with xi->value_len, so we can set new_size safely here.
4678                  * See ocfs2_xattr_set_in_bucket.
4679                  */
4680                 new_size = OCFS2_XATTR_SIZE(name_len) +
4681                            OCFS2_XATTR_SIZE(xi->value_len);
4682
4683                 le16_add_cpu(&xh->xh_name_value_len, -size);
4684                 if (xi->value) {
4685                         if (new_size > size)
4686                                 goto set_new_name_value;
4687
4688                         /* Now replace the old value with new one. */
4689                         if (local)
4690                                 xe->xe_value_size = cpu_to_le64(xi->value_len);
4691                         else
4692                                 xe->xe_value_size = 0;
4693
4694                         val = ocfs2_xattr_bucket_get_val(inode,
4695                                                          xs->bucket, offs);
4696                         memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4697                                size - OCFS2_XATTR_SIZE(name_len));
4698                         if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4699                                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
4700                                        xi->value, xi->value_len);
4701
4702                         le16_add_cpu(&xh->xh_name_value_len, new_size);
4703                         ocfs2_xattr_set_local(xe, local);
4704                         return;
4705                 } else {
4706                         /*
4707                          * Remove the old entry if there is more than one.
4708                          * We don't remove the last entry so that we can
4709                          * use it to indicate the hash value of the empty
4710                          * bucket.
4711                          */
4712                         last -= 1;
4713                         le16_add_cpu(&xh->xh_count, -1);
4714                         if (xh->xh_count) {
4715                                 memmove(xe, xe + 1,
4716                                         (void *)last - (void *)xe);
4717                                 memset(last, 0,
4718                                        sizeof(struct ocfs2_xattr_entry));
4719                         } else
4720                                 xh->xh_free_start =
4721                                         cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4722
4723                         return;
4724                 }
4725         } else {
4726                 /* find a new entry for insert. */
4727                 int low = 0, high = count - 1, tmp;
4728                 struct ocfs2_xattr_entry *tmp_xe;
4729
4730                 while (low <= high && count) {
4731                         tmp = (low + high) / 2;
4732                         tmp_xe = &xh->xh_entries[tmp];
4733
4734                         if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4735                                 low = tmp + 1;
4736                         else if (name_hash <
4737                                  le32_to_cpu(tmp_xe->xe_name_hash))
4738                                 high = tmp - 1;
4739                         else {
4740                                 low = tmp;
4741                                 break;
4742                         }
4743                 }
4744
4745                 xe = &xh->xh_entries[low];
4746                 if (low != count)
4747                         memmove(xe + 1, xe, (void *)last - (void *)xe);
4748
4749                 le16_add_cpu(&xh->xh_count, 1);
4750                 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4751                 xe->xe_name_hash = cpu_to_le32(name_hash);
4752                 xe->xe_name_len = name_len;
4753                 ocfs2_xattr_set_type(xe, xi->name_index);
4754         }
4755
4756 set_new_name_value:
4757         /* Insert the new name+value. */
4758         size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4759
4760         /*
4761          * We must make sure that the name/value pair
4762          * exists in the same block.
4763          */
4764         offs = le16_to_cpu(xh->xh_free_start);
4765         start = offs - size;
4766
4767         if (start >> inode->i_sb->s_blocksize_bits !=
4768             (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4769                 offs = offs - offs % blocksize;
4770                 xh->xh_free_start = cpu_to_le16(offs);
4771         }
4772
4773         val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4774         xe->xe_name_offset = cpu_to_le16(offs - size);
4775
4776         memset(val, 0, size);
4777         memcpy(val, xi->name, name_len);
4778         memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4779
4780         xe->xe_value_size = cpu_to_le64(xi->value_len);
4781         ocfs2_xattr_set_local(xe, local);
4782         xs->here = xe;
4783         le16_add_cpu(&xh->xh_free_start, -size);
4784         le16_add_cpu(&xh->xh_name_value_len, size);
4785
4786         return;
4787 }
4788
4789 /*
4790  * Set the xattr entry in the specified bucket.
4791  * The bucket is indicated by xs->bucket and it should have the enough
4792  * space for the xattr insertion.
4793  */
4794 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4795                                            handle_t *handle,
4796                                            struct ocfs2_xattr_info *xi,
4797                                            struct ocfs2_xattr_search *xs,
4798                                            u32 name_hash,
4799                                            int local)
4800 {
4801         int ret;
4802         u64 blkno;
4803
4804         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4805              (unsigned long)xi->value_len, xi->name_index,
4806              (unsigned long long)bucket_blkno(xs->bucket));
4807
4808         if (!xs->bucket->bu_bhs[1]) {
4809                 blkno = bucket_blkno(xs->bucket);
4810                 ocfs2_xattr_bucket_relse(xs->bucket);
4811                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4812                 if (ret) {
4813                         mlog_errno(ret);
4814                         goto out;
4815                 }
4816         }
4817
4818         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4819                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4820         if (ret < 0) {
4821                 mlog_errno(ret);
4822                 goto out;
4823         }
4824
4825         ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4826         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4827
4828 out:
4829         return ret;
4830 }
4831
4832 /*
4833  * Truncate the specified xe_off entry in xattr bucket.
4834  * bucket is indicated by header_bh and len is the new length.
4835  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4836  *
4837  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4838  */
4839 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4840                                              struct ocfs2_xattr_bucket *bucket,
4841                                              int xe_off,
4842                                              int len,
4843                                              struct ocfs2_xattr_set_ctxt *ctxt)
4844 {
4845         int ret, offset;
4846         u64 value_blk;
4847         struct ocfs2_xattr_entry *xe;
4848         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4849         size_t blocksize = inode->i_sb->s_blocksize;
4850         struct ocfs2_xattr_value_buf vb = {
4851                 .vb_access = ocfs2_journal_access,
4852         };
4853
4854         xe = &xh->xh_entries[xe_off];
4855
4856         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4857
4858         offset = le16_to_cpu(xe->xe_name_offset) +
4859                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4860
4861         value_blk = offset / blocksize;
4862
4863         /* We don't allow ocfs2_xattr_value to be stored in different block. */
4864         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4865
4866         vb.vb_bh = bucket->bu_bhs[value_blk];
4867         BUG_ON(!vb.vb_bh);
4868
4869         vb.vb_xv = (struct ocfs2_xattr_value_root *)
4870                 (vb.vb_bh->b_data + offset % blocksize);
4871
4872         /*
4873          * From here on out we have to dirty the bucket.  The generic
4874          * value calls only modify one of the bucket's bhs, but we need
4875          * to send the bucket at once.  So if they error, they *could* have
4876          * modified something.  We have to assume they did, and dirty
4877          * the whole bucket.  This leaves us in a consistent state.
4878          */
4879         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4880              xe_off, (unsigned long long)bucket_blkno(bucket), len);
4881         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
4882         if (ret) {
4883                 mlog_errno(ret);
4884                 goto out;
4885         }
4886
4887         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
4888                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4889         if (ret) {
4890                 mlog_errno(ret);
4891                 goto out;
4892         }
4893
4894         xe->xe_value_size = cpu_to_le64(len);
4895
4896         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
4897
4898 out:
4899         return ret;
4900 }
4901
4902 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4903                                         struct ocfs2_xattr_search *xs,
4904                                         int len,
4905                                         struct ocfs2_xattr_set_ctxt *ctxt)
4906 {
4907         int ret, offset;
4908         struct ocfs2_xattr_entry *xe = xs->here;
4909         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4910
4911         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4912
4913         offset = xe - xh->xh_entries;
4914         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
4915                                                 offset, len, ctxt);
4916         if (ret)
4917                 mlog_errno(ret);
4918
4919         return ret;
4920 }
4921
4922 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4923                                                 handle_t *handle,
4924                                                 struct ocfs2_xattr_search *xs,
4925                                                 char *val,
4926                                                 int value_len)
4927 {
4928         int ret, offset, block_off;
4929         struct ocfs2_xattr_value_root *xv;
4930         struct ocfs2_xattr_entry *xe = xs->here;
4931         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4932         void *base;
4933         struct ocfs2_xattr_value_buf vb = {
4934                 .vb_access = ocfs2_journal_access,
4935         };
4936
4937         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4938
4939         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
4940                                                 xe - xh->xh_entries,
4941                                                 &block_off,
4942                                                 &offset);
4943         if (ret) {
4944                 mlog_errno(ret);
4945                 goto out;
4946         }
4947
4948         base = bucket_block(xs->bucket, block_off);
4949         xv = (struct ocfs2_xattr_value_root *)(base + offset +
4950                  OCFS2_XATTR_SIZE(xe->xe_name_len));
4951
4952         vb.vb_xv = xv;
4953         vb.vb_bh = xs->bucket->bu_bhs[block_off];
4954         ret = __ocfs2_xattr_set_value_outside(inode, handle,
4955                                               &vb, val, value_len);
4956         if (ret)
4957                 mlog_errno(ret);
4958 out:
4959         return ret;
4960 }
4961
4962 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4963                                   struct buffer_head *root_bh,
4964                                   u64 blkno,
4965                                   u32 cpos,
4966                                   u32 len,
4967                                   void *para)
4968 {
4969         int ret;
4970         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4971         struct inode *tl_inode = osb->osb_tl_inode;
4972         handle_t *handle;
4973         struct ocfs2_xattr_block *xb =
4974                         (struct ocfs2_xattr_block *)root_bh->b_data;
4975         struct ocfs2_alloc_context *meta_ac = NULL;
4976         struct ocfs2_cached_dealloc_ctxt dealloc;
4977         struct ocfs2_extent_tree et;
4978
4979         ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
4980                                           ocfs2_delete_xattr_in_bucket, NULL);
4981         if (ret) {
4982                 mlog_errno(ret);
4983                 return ret;
4984         }
4985
4986         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4987
4988         ocfs2_init_dealloc_ctxt(&dealloc);
4989
4990         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4991              cpos, len, (unsigned long long)blkno);
4992
4993         ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
4994                                                len);
4995
4996         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4997         if (ret) {
4998                 mlog_errno(ret);
4999                 return ret;
5000         }
5001
5002         mutex_lock(&tl_inode->i_mutex);
5003
5004         if (ocfs2_truncate_log_needs_flush(osb)) {
5005                 ret = __ocfs2_flush_truncate_log(osb);
5006                 if (ret < 0) {
5007                         mlog_errno(ret);
5008                         goto out;
5009                 }
5010         }
5011
5012         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5013         if (IS_ERR(handle)) {
5014                 ret = -ENOMEM;
5015                 mlog_errno(ret);
5016                 goto out;
5017         }
5018
5019         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5020                                       OCFS2_JOURNAL_ACCESS_WRITE);
5021         if (ret) {
5022                 mlog_errno(ret);
5023                 goto out_commit;
5024         }
5025
5026         ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5027                                   &dealloc);
5028         if (ret) {
5029                 mlog_errno(ret);
5030                 goto out_commit;
5031         }
5032
5033         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5034
5035         ret = ocfs2_journal_dirty(handle, root_bh);
5036         if (ret) {
5037                 mlog_errno(ret);
5038                 goto out_commit;
5039         }
5040
5041         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5042         if (ret)
5043                 mlog_errno(ret);
5044
5045 out_commit:
5046         ocfs2_commit_trans(osb, handle);
5047 out:
5048         ocfs2_schedule_truncate_log_flush(osb, 1);
5049
5050         mutex_unlock(&tl_inode->i_mutex);
5051
5052         if (meta_ac)
5053                 ocfs2_free_alloc_context(meta_ac);
5054
5055         ocfs2_run_deallocs(osb, &dealloc);
5056
5057         return ret;
5058 }
5059
5060 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
5061                                          handle_t *handle,
5062                                          struct ocfs2_xattr_search *xs)
5063 {
5064         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5065         struct ocfs2_xattr_entry *last = &xh->xh_entries[
5066                                                 le16_to_cpu(xh->xh_count) - 1];
5067         int ret = 0;
5068
5069         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
5070                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5071         if (ret) {
5072                 mlog_errno(ret);
5073                 return;
5074         }
5075
5076         /* Remove the old entry. */
5077         memmove(xs->here, xs->here + 1,
5078                 (void *)last - (void *)xs->here);
5079         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
5080         le16_add_cpu(&xh->xh_count, -1);
5081
5082         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
5083 }
5084
5085 /*
5086  * Set the xattr name/value in the bucket specified in xs.
5087  *
5088  * As the new value in xi may be stored in the bucket or in an outside cluster,
5089  * we divide the whole process into 3 steps:
5090  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
5091  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
5092  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
5093  * 4. If the clusters for the new outside value can't be allocated, we need
5094  *    to free the xattr we allocated in set.
5095  */
5096 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
5097                                      struct ocfs2_xattr_info *xi,
5098                                      struct ocfs2_xattr_search *xs,
5099                                      struct ocfs2_xattr_set_ctxt *ctxt)
5100 {
5101         int ret, local = 1;
5102         size_t value_len;
5103         char *val = (char *)xi->value;
5104         struct ocfs2_xattr_entry *xe = xs->here;
5105         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
5106                                               strlen(xi->name));
5107
5108         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
5109                 /*
5110                  * We need to truncate the xattr storage first.
5111                  *
5112                  * If both the old and new value are stored to
5113                  * outside block, we only need to truncate
5114                  * the storage and then set the value outside.
5115                  *
5116                  * If the new value should be stored within block,
5117                  * we should free all the outside block first and
5118                  * the modification to the xattr block will be done
5119                  * by following steps.
5120                  */
5121                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5122                         value_len = xi->value_len;
5123                 else
5124                         value_len = 0;
5125
5126                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5127                                                            value_len,
5128                                                            ctxt);
5129                 if (ret)
5130                         goto out;
5131
5132                 if (value_len)
5133                         goto set_value_outside;
5134         }
5135
5136         value_len = xi->value_len;
5137         /* So we have to handle the inside block change now. */
5138         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
5139                 /*
5140                  * If the new value will be stored outside of block,
5141                  * initalize a new empty value root and insert it first.
5142                  */
5143                 local = 0;
5144                 xi->value = &def_xv;
5145                 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
5146         }
5147
5148         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
5149                                               name_hash, local);
5150         if (ret) {
5151                 mlog_errno(ret);
5152                 goto out;
5153         }
5154
5155         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
5156                 goto out;
5157
5158         /* allocate the space now for the outside block storage. */
5159         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5160                                                    value_len, ctxt);
5161         if (ret) {
5162                 mlog_errno(ret);
5163
5164                 if (xs->not_found) {
5165                         /*
5166                          * We can't allocate enough clusters for outside
5167                          * storage and we have allocated xattr already,
5168                          * so need to remove it.
5169                          */
5170                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
5171                 }
5172                 goto out;
5173         }
5174
5175 set_value_outside:
5176         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5177                                                    xs, val, value_len);
5178 out:
5179         return ret;
5180 }
5181
5182 /*
5183  * check whether the xattr bucket is filled up with the same hash value.
5184  * If we want to insert the xattr with the same hash, return -ENOSPC.
5185  * If we want to insert a xattr with different hash value, go ahead
5186  * and ocfs2_divide_xattr_bucket will handle this.
5187  */
5188 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5189                                               struct ocfs2_xattr_bucket *bucket,
5190                                               const char *name)
5191 {
5192         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5193         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5194
5195         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5196                 return 0;
5197
5198         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5199             xh->xh_entries[0].xe_name_hash) {
5200                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5201                      "hash = %u\n",
5202                      (unsigned long long)bucket_blkno(bucket),
5203                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5204                 return -ENOSPC;
5205         }
5206
5207         return 0;
5208 }
5209
5210 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5211                                              struct ocfs2_xattr_info *xi,
5212                                              struct ocfs2_xattr_search *xs,
5213                                              struct ocfs2_xattr_set_ctxt *ctxt)
5214 {
5215         struct ocfs2_xattr_header *xh;
5216         struct ocfs2_xattr_entry *xe;
5217         u16 count, header_size, xh_free_start;
5218         int free, max_free, need, old;
5219         size_t value_size = 0, name_len = strlen(xi->name);
5220         size_t blocksize = inode->i_sb->s_blocksize;
5221         int ret, allocation = 0;
5222
5223         mlog_entry("Set xattr %s in xattr index block\n", xi->name);
5224
5225 try_again:
5226         xh = xs->header;
5227         count = le16_to_cpu(xh->xh_count);
5228         xh_free_start = le16_to_cpu(xh->xh_free_start);
5229         header_size = sizeof(struct ocfs2_xattr_header) +
5230                         count * sizeof(struct ocfs2_xattr_entry);
5231         max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5232                 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5233
5234         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5235                         "of %u which exceed block size\n",
5236                         (unsigned long long)bucket_blkno(xs->bucket),
5237                         header_size);
5238
5239         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5240                 value_size = OCFS2_XATTR_ROOT_SIZE;
5241         else if (xi->value)
5242                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
5243
5244         if (xs->not_found)
5245                 need = sizeof(struct ocfs2_xattr_entry) +
5246                         OCFS2_XATTR_SIZE(name_len) + value_size;
5247         else {
5248                 need = value_size + OCFS2_XATTR_SIZE(name_len);
5249
5250                 /*
5251                  * We only replace the old value if the new length is smaller
5252                  * than the old one. Otherwise we will allocate new space in the
5253                  * bucket to store it.
5254                  */
5255                 xe = xs->here;
5256                 if (ocfs2_xattr_is_local(xe))
5257                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5258                 else
5259                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5260
5261                 if (old >= value_size)
5262                         need = 0;
5263         }
5264
5265         free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5266         /*
5267          * We need to make sure the new name/value pair
5268          * can exist in the same block.
5269          */
5270         if (xh_free_start % blocksize < need)
5271                 free -= xh_free_start % blocksize;
5272
5273         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5274              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5275              " %u\n", xs->not_found,
5276              (unsigned long long)bucket_blkno(xs->bucket),
5277              free, need, max_free, le16_to_cpu(xh->xh_free_start),
5278              le16_to_cpu(xh->xh_name_value_len));
5279
5280         if (free < need ||
5281             (xs->not_found &&
5282              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5283                 if (need <= max_free &&
5284                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5285                         /*
5286                          * We can create the space by defragment. Since only the
5287                          * name/value will be moved, the xe shouldn't be changed
5288                          * in xs.
5289                          */
5290                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5291                                                         xs->bucket);
5292                         if (ret) {
5293                                 mlog_errno(ret);
5294                                 goto out;
5295                         }
5296
5297                         xh_free_start = le16_to_cpu(xh->xh_free_start);
5298                         free = xh_free_start - header_size
5299                                 - OCFS2_XATTR_HEADER_GAP;
5300                         if (xh_free_start % blocksize < need)
5301                                 free -= xh_free_start % blocksize;
5302
5303                         if (free >= need)
5304                                 goto xattr_set;
5305
5306                         mlog(0, "Can't get enough space for xattr insert by "
5307                              "defragment. Need %u bytes, but we have %d, so "
5308                              "allocate new bucket for it.\n", need, free);
5309                 }
5310
5311                 /*
5312                  * We have to add new buckets or clusters and one
5313                  * allocation should leave us enough space for insert.
5314                  */
5315                 BUG_ON(allocation);
5316
5317                 /*
5318                  * We do not allow for overlapping ranges between buckets. And
5319                  * the maximum number of collisions we will allow for then is
5320                  * one bucket's worth, so check it here whether we need to
5321                  * add a new bucket for the insert.
5322                  */
5323                 ret = ocfs2_check_xattr_bucket_collision(inode,
5324                                                          xs->bucket,
5325                                                          xi->name);
5326                 if (ret) {
5327                         mlog_errno(ret);
5328                         goto out;
5329                 }
5330
5331                 ret = ocfs2_add_new_xattr_bucket(inode,
5332                                                  xs->xattr_bh,
5333                                                  xs->bucket,
5334                                                  ctxt);
5335                 if (ret) {
5336                         mlog_errno(ret);
5337                         goto out;
5338                 }
5339
5340                 /*
5341                  * ocfs2_add_new_xattr_bucket() will have updated
5342                  * xs->bucket if it moved, but it will not have updated
5343                  * any of the other search fields.  Thus, we drop it and
5344                  * re-search.  Everything should be cached, so it'll be
5345                  * quick.
5346                  */
5347                 ocfs2_xattr_bucket_relse(xs->bucket);
5348                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5349                                                    xi->name_index,
5350                                                    xi->name, xs);
5351                 if (ret && ret != -ENODATA)
5352                         goto out;
5353                 xs->not_found = ret;
5354                 allocation = 1;
5355                 goto try_again;
5356         }
5357
5358 xattr_set:
5359         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5360 out:
5361         mlog_exit(ret);
5362         return ret;
5363 }
5364
5365 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5366                                         struct ocfs2_xattr_bucket *bucket,
5367                                         void *para)
5368 {
5369         int ret = 0;
5370         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5371         u16 i;
5372         struct ocfs2_xattr_entry *xe;
5373         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5374         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5375         int credits = ocfs2_remove_extent_credits(osb->sb) +
5376                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5377
5378
5379         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5380
5381         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5382                 xe = &xh->xh_entries[i];
5383                 if (ocfs2_xattr_is_local(xe))
5384                         continue;
5385
5386                 ctxt.handle = ocfs2_start_trans(osb, credits);
5387                 if (IS_ERR(ctxt.handle)) {
5388                         ret = PTR_ERR(ctxt.handle);
5389                         mlog_errno(ret);
5390                         break;
5391                 }
5392
5393                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5394                                                         i, 0, &ctxt);
5395
5396                 ocfs2_commit_trans(osb, ctxt.handle);
5397                 if (ret) {
5398                         mlog_errno(ret);
5399                         break;
5400                 }
5401         }
5402
5403         ocfs2_schedule_truncate_log_flush(osb, 1);
5404         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5405         return ret;
5406 }
5407
5408 /*
5409  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5410  * or change the extent record flag), we need to recalculate
5411  * the metaecc for the whole bucket. So it is done here.
5412  *
5413  * Note:
5414  * We have to give the extra credits for the caller.
5415  */
5416 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5417                                             handle_t *handle,
5418                                             void *para)
5419 {
5420         int ret;
5421         struct ocfs2_xattr_bucket *bucket =
5422                         (struct ocfs2_xattr_bucket *)para;
5423
5424         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5425                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5426         if (ret) {
5427                 mlog_errno(ret);
5428                 return ret;
5429         }
5430
5431         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5432
5433         return 0;
5434 }
5435
5436 /*
5437  * Special action we need if the xattr value is refcounted.
5438  *
5439  * 1. If the xattr is refcounted, lock the tree.
5440  * 2. CoW the xattr if we are setting the new value and the value
5441  *    will be stored outside.
5442  * 3. In other case, decrease_refcount will work for us, so just
5443  *    lock the refcount tree, calculate the meta and credits is OK.
5444  *
5445  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5446  * currently CoW is a completed transaction, while this function
5447  * will also lock the allocators and let us deadlock. So we will
5448  * CoW the whole xattr value.
5449  */
5450 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5451                                         struct ocfs2_dinode *di,
5452                                         struct ocfs2_xattr_info *xi,
5453                                         struct ocfs2_xattr_search *xis,
5454                                         struct ocfs2_xattr_search *xbs,
5455                                         struct ocfs2_refcount_tree **ref_tree,
5456                                         int *meta_add,
5457                                        &