2563df89fc2a0e60f28a2e8958ddfda525099fb4
[linux-2.6.git] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58
59
60 struct ocfs2_xattr_def_value_root {
61         struct ocfs2_xattr_value_root   xv;
62         struct ocfs2_extent_rec         er;
63 };
64
65 struct ocfs2_xattr_bucket {
66         /* The inode these xattrs are associated with */
67         struct inode *bu_inode;
68
69         /* The actual buffers that make up the bucket */
70         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
71
72         /* How many blocks make up one bucket for this filesystem */
73         int bu_blocks;
74 };
75
76 struct ocfs2_xattr_set_ctxt {
77         handle_t *handle;
78         struct ocfs2_alloc_context *meta_ac;
79         struct ocfs2_alloc_context *data_ac;
80         struct ocfs2_cached_dealloc_ctxt dealloc;
81 };
82
83 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
84 #define OCFS2_XATTR_INLINE_SIZE 80
85 #define OCFS2_XATTR_HEADER_GAP  4
86 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
87                                          - sizeof(struct ocfs2_xattr_header) \
88                                          - OCFS2_XATTR_HEADER_GAP)
89 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
90                                          - sizeof(struct ocfs2_xattr_block) \
91                                          - sizeof(struct ocfs2_xattr_header) \
92                                          - OCFS2_XATTR_HEADER_GAP)
93
94 static struct ocfs2_xattr_def_value_root def_xv = {
95         .xv.xr_list.l_count = cpu_to_le16(1),
96 };
97
98 struct xattr_handler *ocfs2_xattr_handlers[] = {
99         &ocfs2_xattr_user_handler,
100 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103 #endif
104         &ocfs2_xattr_trusted_handler,
105         &ocfs2_xattr_security_handler,
106         NULL
107 };
108
109 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
110         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
111 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
112         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
113                                         = &ocfs2_xattr_acl_access_handler,
114         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
115                                         = &ocfs2_xattr_acl_default_handler,
116 #endif
117         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
118         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
119 };
120
121 struct ocfs2_xattr_info {
122         int name_index;
123         const char *name;
124         const void *value;
125         size_t value_len;
126 };
127
128 struct ocfs2_xattr_search {
129         struct buffer_head *inode_bh;
130         /*
131          * xattr_bh point to the block buffer head which has extended attribute
132          * when extended attribute in inode, xattr_bh is equal to inode_bh.
133          */
134         struct buffer_head *xattr_bh;
135         struct ocfs2_xattr_header *header;
136         struct ocfs2_xattr_bucket *bucket;
137         void *base;
138         void *end;
139         struct ocfs2_xattr_entry *here;
140         int not_found;
141 };
142
143 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
144                                              struct ocfs2_xattr_header *xh,
145                                              int index,
146                                              int *block_off,
147                                              int *new_offset);
148
149 static int ocfs2_xattr_block_find(struct inode *inode,
150                                   int name_index,
151                                   const char *name,
152                                   struct ocfs2_xattr_search *xs);
153 static int ocfs2_xattr_index_block_find(struct inode *inode,
154                                         struct buffer_head *root_bh,
155                                         int name_index,
156                                         const char *name,
157                                         struct ocfs2_xattr_search *xs);
158
159 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
160                                         struct ocfs2_xattr_tree_root *xt,
161                                         char *buffer,
162                                         size_t buffer_size);
163
164 static int ocfs2_xattr_create_index_block(struct inode *inode,
165                                           struct ocfs2_xattr_search *xs,
166                                           struct ocfs2_xattr_set_ctxt *ctxt);
167
168 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
169                                              struct ocfs2_xattr_info *xi,
170                                              struct ocfs2_xattr_search *xs,
171                                              struct ocfs2_xattr_set_ctxt *ctxt);
172
173 static int ocfs2_delete_xattr_index_block(struct inode *inode,
174                                           struct buffer_head *xb_bh);
175 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
176                                   u64 src_blk, u64 last_blk, u64 to_blk,
177                                   unsigned int start_bucket,
178                                   u32 *first_hash);
179
180 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
181 {
182         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
183 }
184
185 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
186 {
187         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
188 }
189
190 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
191 {
192         u16 len = sb->s_blocksize -
193                  offsetof(struct ocfs2_xattr_header, xh_entries);
194
195         return len / sizeof(struct ocfs2_xattr_entry);
196 }
197
198 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
199 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
200 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
201
202 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
203 {
204         struct ocfs2_xattr_bucket *bucket;
205         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
206
207         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
208
209         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
210         if (bucket) {
211                 bucket->bu_inode = inode;
212                 bucket->bu_blocks = blks;
213         }
214
215         return bucket;
216 }
217
218 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
219 {
220         int i;
221
222         for (i = 0; i < bucket->bu_blocks; i++) {
223                 brelse(bucket->bu_bhs[i]);
224                 bucket->bu_bhs[i] = NULL;
225         }
226 }
227
228 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
229 {
230         if (bucket) {
231                 ocfs2_xattr_bucket_relse(bucket);
232                 bucket->bu_inode = NULL;
233                 kfree(bucket);
234         }
235 }
236
237 /*
238  * A bucket that has never been written to disk doesn't need to be
239  * read.  We just need the buffer_heads.  Don't call this for
240  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
241  * them fully.
242  */
243 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
244                                    u64 xb_blkno)
245 {
246         int i, rc = 0;
247
248         for (i = 0; i < bucket->bu_blocks; i++) {
249                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
250                                               xb_blkno + i);
251                 if (!bucket->bu_bhs[i]) {
252                         rc = -EIO;
253                         mlog_errno(rc);
254                         break;
255                 }
256
257                 if (!ocfs2_buffer_uptodate(bucket->bu_inode,
258                                            bucket->bu_bhs[i]))
259                         ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
260                                                       bucket->bu_bhs[i]);
261         }
262
263         if (rc)
264                 ocfs2_xattr_bucket_relse(bucket);
265         return rc;
266 }
267
268 /* Read the xattr bucket at xb_blkno */
269 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
270                                    u64 xb_blkno)
271 {
272         int rc;
273
274         rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
275                                bucket->bu_blocks, bucket->bu_bhs, 0,
276                                NULL);
277         if (!rc) {
278                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
279                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
280                                                  bucket->bu_bhs,
281                                                  bucket->bu_blocks,
282                                                  &bucket_xh(bucket)->xh_check);
283                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
284                 if (rc)
285                         mlog_errno(rc);
286         }
287
288         if (rc)
289                 ocfs2_xattr_bucket_relse(bucket);
290         return rc;
291 }
292
293 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
294                                              struct ocfs2_xattr_bucket *bucket,
295                                              int type)
296 {
297         int i, rc = 0;
298
299         for (i = 0; i < bucket->bu_blocks; i++) {
300                 rc = ocfs2_journal_access(handle, bucket->bu_inode,
301                                           bucket->bu_bhs[i], type);
302                 if (rc) {
303                         mlog_errno(rc);
304                         break;
305                 }
306         }
307
308         return rc;
309 }
310
311 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
312                                              struct ocfs2_xattr_bucket *bucket)
313 {
314         int i;
315
316         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
317         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
318                                    bucket->bu_bhs, bucket->bu_blocks,
319                                    &bucket_xh(bucket)->xh_check);
320         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
321
322         for (i = 0; i < bucket->bu_blocks; i++)
323                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
324 }
325
326 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
327                                          struct ocfs2_xattr_bucket *src)
328 {
329         int i;
330         int blocksize = src->bu_inode->i_sb->s_blocksize;
331
332         BUG_ON(dest->bu_blocks != src->bu_blocks);
333         BUG_ON(dest->bu_inode != src->bu_inode);
334
335         for (i = 0; i < src->bu_blocks; i++) {
336                 memcpy(bucket_block(dest, i), bucket_block(src, i),
337                        blocksize);
338         }
339 }
340
341 static int ocfs2_validate_xattr_block(struct super_block *sb,
342                                       struct buffer_head *bh)
343 {
344         int rc;
345         struct ocfs2_xattr_block *xb =
346                 (struct ocfs2_xattr_block *)bh->b_data;
347
348         mlog(0, "Validating xattr block %llu\n",
349              (unsigned long long)bh->b_blocknr);
350
351         BUG_ON(!buffer_uptodate(bh));
352
353         /*
354          * If the ecc fails, we return the error but otherwise
355          * leave the filesystem running.  We know any error is
356          * local to this block.
357          */
358         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
359         if (rc)
360                 return rc;
361
362         /*
363          * Errors after here are fatal
364          */
365
366         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
367                 ocfs2_error(sb,
368                             "Extended attribute block #%llu has bad "
369                             "signature %.*s",
370                             (unsigned long long)bh->b_blocknr, 7,
371                             xb->xb_signature);
372                 return -EINVAL;
373         }
374
375         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
376                 ocfs2_error(sb,
377                             "Extended attribute block #%llu has an "
378                             "invalid xb_blkno of %llu",
379                             (unsigned long long)bh->b_blocknr,
380                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
381                 return -EINVAL;
382         }
383
384         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
385                 ocfs2_error(sb,
386                             "Extended attribute block #%llu has an invalid "
387                             "xb_fs_generation of #%u",
388                             (unsigned long long)bh->b_blocknr,
389                             le32_to_cpu(xb->xb_fs_generation));
390                 return -EINVAL;
391         }
392
393         return 0;
394 }
395
396 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
397                                   struct buffer_head **bh)
398 {
399         int rc;
400         struct buffer_head *tmp = *bh;
401
402         rc = ocfs2_read_block(inode, xb_blkno, &tmp,
403                               ocfs2_validate_xattr_block);
404
405         /* If ocfs2_read_block() got us a new bh, pass it up. */
406         if (!rc && !*bh)
407                 *bh = tmp;
408
409         return rc;
410 }
411
412 static inline const char *ocfs2_xattr_prefix(int name_index)
413 {
414         struct xattr_handler *handler = NULL;
415
416         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
417                 handler = ocfs2_xattr_handler_map[name_index];
418
419         return handler ? handler->prefix : NULL;
420 }
421
422 static u32 ocfs2_xattr_name_hash(struct inode *inode,
423                                  const char *name,
424                                  int name_len)
425 {
426         /* Get hash value of uuid from super block */
427         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
428         int i;
429
430         /* hash extended attribute name */
431         for (i = 0; i < name_len; i++) {
432                 hash = (hash << OCFS2_HASH_SHIFT) ^
433                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
434                        *name++;
435         }
436
437         return hash;
438 }
439
440 /*
441  * ocfs2_xattr_hash_entry()
442  *
443  * Compute the hash of an extended attribute.
444  */
445 static void ocfs2_xattr_hash_entry(struct inode *inode,
446                                    struct ocfs2_xattr_header *header,
447                                    struct ocfs2_xattr_entry *entry)
448 {
449         u32 hash = 0;
450         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
451
452         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
453         entry->xe_name_hash = cpu_to_le32(hash);
454
455         return;
456 }
457
458 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
459 {
460         int size = 0;
461
462         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
463                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
464         else
465                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
466         size += sizeof(struct ocfs2_xattr_entry);
467
468         return size;
469 }
470
471 int ocfs2_calc_security_init(struct inode *dir,
472                              struct ocfs2_security_xattr_info *si,
473                              int *want_clusters,
474                              int *xattr_credits,
475                              struct ocfs2_alloc_context **xattr_ac)
476 {
477         int ret = 0;
478         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
479         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
480                                                  si->value_len);
481
482         /*
483          * The max space of security xattr taken inline is
484          * 256(name) + 80(value) + 16(entry) = 352 bytes,
485          * So reserve one metadata block for it is ok.
486          */
487         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
488             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
489                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
490                 if (ret) {
491                         mlog_errno(ret);
492                         return ret;
493                 }
494                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
495         }
496
497         /* reserve clusters for xattr value which will be set in B tree*/
498         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
499                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
500                                                             si->value_len);
501
502                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
503                                                            new_clusters);
504                 *want_clusters += new_clusters;
505         }
506         return ret;
507 }
508
509 int ocfs2_calc_xattr_init(struct inode *dir,
510                           struct buffer_head *dir_bh,
511                           int mode,
512                           struct ocfs2_security_xattr_info *si,
513                           int *want_clusters,
514                           int *xattr_credits,
515                           struct ocfs2_alloc_context **xattr_ac)
516 {
517         int ret = 0;
518         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
519         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
520
521         if (si->enable)
522                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
523                                                      si->value_len);
524
525         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
526                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
527                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
528                                         "", NULL, 0);
529                 if (acl_len > 0) {
530                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
531                         if (S_ISDIR(mode))
532                                 a_size <<= 1;
533                 } else if (acl_len != 0 && acl_len != -ENODATA) {
534                         mlog_errno(ret);
535                         return ret;
536                 }
537         }
538
539         if (!(s_size + a_size))
540                 return ret;
541
542         /*
543          * The max space of security xattr taken inline is
544          * 256(name) + 80(value) + 16(entry) = 352 bytes,
545          * The max space of acl xattr taken inline is
546          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
547          * when blocksize = 512, may reserve one more cluser for
548          * xattr bucket, otherwise reserve one metadata block
549          * for them is ok.
550          * If this is a new directory with inline data,
551          * we choose to reserve the entire inline area for
552          * directory contents and force an external xattr block.
553          */
554         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
555             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
556             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
557                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
558                 if (ret) {
559                         mlog_errno(ret);
560                         return ret;
561                 }
562                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
563         }
564
565         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
566             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
567                 *want_clusters += 1;
568                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
569         }
570
571         /*
572          * reserve credits and clusters for xattrs which has large value
573          * and have to be set outside
574          */
575         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
576                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
577                                                         si->value_len);
578                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
579                                                            new_clusters);
580                 *want_clusters += new_clusters;
581         }
582         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
583             acl_len > OCFS2_XATTR_INLINE_SIZE) {
584                 /* for directory, it has DEFAULT and ACCESS two types of acls */
585                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
586                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
587                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
588                                                            new_clusters);
589                 *want_clusters += new_clusters;
590         }
591
592         return ret;
593 }
594
595 static int ocfs2_xattr_extend_allocation(struct inode *inode,
596                                          u32 clusters_to_add,
597                                          struct ocfs2_xattr_value_buf *vb,
598                                          struct ocfs2_xattr_set_ctxt *ctxt)
599 {
600         int status = 0;
601         handle_t *handle = ctxt->handle;
602         enum ocfs2_alloc_restarted why;
603         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
604         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
605         struct ocfs2_extent_tree et;
606
607         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
608
609         ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
610
611         status = vb->vb_access(handle, inode, vb->vb_bh,
612                               OCFS2_JOURNAL_ACCESS_WRITE);
613         if (status < 0) {
614                 mlog_errno(status);
615                 goto leave;
616         }
617
618         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
619         status = ocfs2_add_clusters_in_btree(osb,
620                                              inode,
621                                              &logical_start,
622                                              clusters_to_add,
623                                              0,
624                                              &et,
625                                              handle,
626                                              ctxt->data_ac,
627                                              ctxt->meta_ac,
628                                              &why);
629         if (status < 0) {
630                 mlog_errno(status);
631                 goto leave;
632         }
633
634         status = ocfs2_journal_dirty(handle, vb->vb_bh);
635         if (status < 0) {
636                 mlog_errno(status);
637                 goto leave;
638         }
639
640         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
641
642         /*
643          * We should have already allocated enough space before the transaction,
644          * so no need to restart.
645          */
646         BUG_ON(why != RESTART_NONE || clusters_to_add);
647
648 leave:
649
650         return status;
651 }
652
653 static int __ocfs2_remove_xattr_range(struct inode *inode,
654                                       struct ocfs2_xattr_value_buf *vb,
655                                       u32 cpos, u32 phys_cpos, u32 len,
656                                       struct ocfs2_xattr_set_ctxt *ctxt)
657 {
658         int ret;
659         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
660         handle_t *handle = ctxt->handle;
661         struct ocfs2_extent_tree et;
662
663         ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
664
665         ret = vb->vb_access(handle, inode, vb->vb_bh,
666                             OCFS2_JOURNAL_ACCESS_WRITE);
667         if (ret) {
668                 mlog_errno(ret);
669                 goto out;
670         }
671
672         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
673                                   &ctxt->dealloc);
674         if (ret) {
675                 mlog_errno(ret);
676                 goto out;
677         }
678
679         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
680
681         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
682         if (ret) {
683                 mlog_errno(ret);
684                 goto out;
685         }
686
687         ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
688         if (ret)
689                 mlog_errno(ret);
690
691 out:
692         return ret;
693 }
694
695 static int ocfs2_xattr_shrink_size(struct inode *inode,
696                                    u32 old_clusters,
697                                    u32 new_clusters,
698                                    struct ocfs2_xattr_value_buf *vb,
699                                    struct ocfs2_xattr_set_ctxt *ctxt)
700 {
701         int ret = 0;
702         u32 trunc_len, cpos, phys_cpos, alloc_size;
703         u64 block;
704
705         if (old_clusters <= new_clusters)
706                 return 0;
707
708         cpos = new_clusters;
709         trunc_len = old_clusters - new_clusters;
710         while (trunc_len) {
711                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
712                                                &alloc_size,
713                                                &vb->vb_xv->xr_list);
714                 if (ret) {
715                         mlog_errno(ret);
716                         goto out;
717                 }
718
719                 if (alloc_size > trunc_len)
720                         alloc_size = trunc_len;
721
722                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
723                                                  phys_cpos, alloc_size,
724                                                  ctxt);
725                 if (ret) {
726                         mlog_errno(ret);
727                         goto out;
728                 }
729
730                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
731                 ocfs2_remove_xattr_clusters_from_cache(inode, block,
732                                                        alloc_size);
733                 cpos += alloc_size;
734                 trunc_len -= alloc_size;
735         }
736
737 out:
738         return ret;
739 }
740
741 static int ocfs2_xattr_value_truncate(struct inode *inode,
742                                       struct ocfs2_xattr_value_buf *vb,
743                                       int len,
744                                       struct ocfs2_xattr_set_ctxt *ctxt)
745 {
746         int ret;
747         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
748         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
749
750         if (new_clusters == old_clusters)
751                 return 0;
752
753         if (new_clusters > old_clusters)
754                 ret = ocfs2_xattr_extend_allocation(inode,
755                                                     new_clusters - old_clusters,
756                                                     vb, ctxt);
757         else
758                 ret = ocfs2_xattr_shrink_size(inode,
759                                               old_clusters, new_clusters,
760                                               vb, ctxt);
761
762         return ret;
763 }
764
765 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
766                                   size_t *result, const char *prefix,
767                                   const char *name, int name_len)
768 {
769         char *p = buffer + *result;
770         int prefix_len = strlen(prefix);
771         int total_len = prefix_len + name_len + 1;
772
773         *result += total_len;
774
775         /* we are just looking for how big our buffer needs to be */
776         if (!size)
777                 return 0;
778
779         if (*result > size)
780                 return -ERANGE;
781
782         memcpy(p, prefix, prefix_len);
783         memcpy(p + prefix_len, name, name_len);
784         p[prefix_len + name_len] = '\0';
785
786         return 0;
787 }
788
789 static int ocfs2_xattr_list_entries(struct inode *inode,
790                                     struct ocfs2_xattr_header *header,
791                                     char *buffer, size_t buffer_size)
792 {
793         size_t result = 0;
794         int i, type, ret;
795         const char *prefix, *name;
796
797         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
798                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
799                 type = ocfs2_xattr_get_type(entry);
800                 prefix = ocfs2_xattr_prefix(type);
801
802                 if (prefix) {
803                         name = (const char *)header +
804                                 le16_to_cpu(entry->xe_name_offset);
805
806                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
807                                                      &result, prefix, name,
808                                                      entry->xe_name_len);
809                         if (ret)
810                                 return ret;
811                 }
812         }
813
814         return result;
815 }
816
817 static int ocfs2_xattr_ibody_list(struct inode *inode,
818                                   struct ocfs2_dinode *di,
819                                   char *buffer,
820                                   size_t buffer_size)
821 {
822         struct ocfs2_xattr_header *header = NULL;
823         struct ocfs2_inode_info *oi = OCFS2_I(inode);
824         int ret = 0;
825
826         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
827                 return ret;
828
829         header = (struct ocfs2_xattr_header *)
830                  ((void *)di + inode->i_sb->s_blocksize -
831                  le16_to_cpu(di->i_xattr_inline_size));
832
833         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
834
835         return ret;
836 }
837
838 static int ocfs2_xattr_block_list(struct inode *inode,
839                                   struct ocfs2_dinode *di,
840                                   char *buffer,
841                                   size_t buffer_size)
842 {
843         struct buffer_head *blk_bh = NULL;
844         struct ocfs2_xattr_block *xb;
845         int ret = 0;
846
847         if (!di->i_xattr_loc)
848                 return ret;
849
850         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
851                                      &blk_bh);
852         if (ret < 0) {
853                 mlog_errno(ret);
854                 return ret;
855         }
856
857         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
858         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
859                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
860                 ret = ocfs2_xattr_list_entries(inode, header,
861                                                buffer, buffer_size);
862         } else {
863                 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
864                 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
865                                                    buffer, buffer_size);
866         }
867
868         brelse(blk_bh);
869
870         return ret;
871 }
872
873 ssize_t ocfs2_listxattr(struct dentry *dentry,
874                         char *buffer,
875                         size_t size)
876 {
877         int ret = 0, i_ret = 0, b_ret = 0;
878         struct buffer_head *di_bh = NULL;
879         struct ocfs2_dinode *di = NULL;
880         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
881
882         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
883                 return -EOPNOTSUPP;
884
885         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
886                 return ret;
887
888         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
889         if (ret < 0) {
890                 mlog_errno(ret);
891                 return ret;
892         }
893
894         di = (struct ocfs2_dinode *)di_bh->b_data;
895
896         down_read(&oi->ip_xattr_sem);
897         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
898         if (i_ret < 0)
899                 b_ret = 0;
900         else {
901                 if (buffer) {
902                         buffer += i_ret;
903                         size -= i_ret;
904                 }
905                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
906                                                buffer, size);
907                 if (b_ret < 0)
908                         i_ret = 0;
909         }
910         up_read(&oi->ip_xattr_sem);
911         ocfs2_inode_unlock(dentry->d_inode, 0);
912
913         brelse(di_bh);
914
915         return i_ret + b_ret;
916 }
917
918 static int ocfs2_xattr_find_entry(int name_index,
919                                   const char *name,
920                                   struct ocfs2_xattr_search *xs)
921 {
922         struct ocfs2_xattr_entry *entry;
923         size_t name_len;
924         int i, cmp = 1;
925
926         if (name == NULL)
927                 return -EINVAL;
928
929         name_len = strlen(name);
930         entry = xs->here;
931         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
932                 cmp = name_index - ocfs2_xattr_get_type(entry);
933                 if (!cmp)
934                         cmp = name_len - entry->xe_name_len;
935                 if (!cmp)
936                         cmp = memcmp(name, (xs->base +
937                                      le16_to_cpu(entry->xe_name_offset)),
938                                      name_len);
939                 if (cmp == 0)
940                         break;
941                 entry += 1;
942         }
943         xs->here = entry;
944
945         return cmp ? -ENODATA : 0;
946 }
947
948 static int ocfs2_xattr_get_value_outside(struct inode *inode,
949                                          struct ocfs2_xattr_value_root *xv,
950                                          void *buffer,
951                                          size_t len)
952 {
953         u32 cpos, p_cluster, num_clusters, bpc, clusters;
954         u64 blkno;
955         int i, ret = 0;
956         size_t cplen, blocksize;
957         struct buffer_head *bh = NULL;
958         struct ocfs2_extent_list *el;
959
960         el = &xv->xr_list;
961         clusters = le32_to_cpu(xv->xr_clusters);
962         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
963         blocksize = inode->i_sb->s_blocksize;
964
965         cpos = 0;
966         while (cpos < clusters) {
967                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
968                                                &num_clusters, el);
969                 if (ret) {
970                         mlog_errno(ret);
971                         goto out;
972                 }
973
974                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
975                 /* Copy ocfs2_xattr_value */
976                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
977                         ret = ocfs2_read_block(inode, blkno, &bh, NULL);
978                         if (ret) {
979                                 mlog_errno(ret);
980                                 goto out;
981                         }
982
983                         cplen = len >= blocksize ? blocksize : len;
984                         memcpy(buffer, bh->b_data, cplen);
985                         len -= cplen;
986                         buffer += cplen;
987
988                         brelse(bh);
989                         bh = NULL;
990                         if (len == 0)
991                                 break;
992                 }
993                 cpos += num_clusters;
994         }
995 out:
996         return ret;
997 }
998
999 static int ocfs2_xattr_ibody_get(struct inode *inode,
1000                                  int name_index,
1001                                  const char *name,
1002                                  void *buffer,
1003                                  size_t buffer_size,
1004                                  struct ocfs2_xattr_search *xs)
1005 {
1006         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1007         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1008         struct ocfs2_xattr_value_root *xv;
1009         size_t size;
1010         int ret = 0;
1011
1012         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1013                 return -ENODATA;
1014
1015         xs->end = (void *)di + inode->i_sb->s_blocksize;
1016         xs->header = (struct ocfs2_xattr_header *)
1017                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1018         xs->base = (void *)xs->header;
1019         xs->here = xs->header->xh_entries;
1020
1021         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1022         if (ret)
1023                 return ret;
1024         size = le64_to_cpu(xs->here->xe_value_size);
1025         if (buffer) {
1026                 if (size > buffer_size)
1027                         return -ERANGE;
1028                 if (ocfs2_xattr_is_local(xs->here)) {
1029                         memcpy(buffer, (void *)xs->base +
1030                                le16_to_cpu(xs->here->xe_name_offset) +
1031                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1032                 } else {
1033                         xv = (struct ocfs2_xattr_value_root *)
1034                                 (xs->base + le16_to_cpu(
1035                                  xs->here->xe_name_offset) +
1036                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1037                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1038                                                             buffer, size);
1039                         if (ret < 0) {
1040                                 mlog_errno(ret);
1041                                 return ret;
1042                         }
1043                 }
1044         }
1045
1046         return size;
1047 }
1048
1049 static int ocfs2_xattr_block_get(struct inode *inode,
1050                                  int name_index,
1051                                  const char *name,
1052                                  void *buffer,
1053                                  size_t buffer_size,
1054                                  struct ocfs2_xattr_search *xs)
1055 {
1056         struct ocfs2_xattr_block *xb;
1057         struct ocfs2_xattr_value_root *xv;
1058         size_t size;
1059         int ret = -ENODATA, name_offset, name_len, block_off, i;
1060
1061         xs->bucket = ocfs2_xattr_bucket_new(inode);
1062         if (!xs->bucket) {
1063                 ret = -ENOMEM;
1064                 mlog_errno(ret);
1065                 goto cleanup;
1066         }
1067
1068         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1069         if (ret) {
1070                 mlog_errno(ret);
1071                 goto cleanup;
1072         }
1073
1074         if (xs->not_found) {
1075                 ret = -ENODATA;
1076                 goto cleanup;
1077         }
1078
1079         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1080         size = le64_to_cpu(xs->here->xe_value_size);
1081         if (buffer) {
1082                 ret = -ERANGE;
1083                 if (size > buffer_size)
1084                         goto cleanup;
1085
1086                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1087                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1088                 i = xs->here - xs->header->xh_entries;
1089
1090                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1091                         ret = ocfs2_xattr_bucket_get_name_value(inode,
1092                                                                 bucket_xh(xs->bucket),
1093                                                                 i,
1094                                                                 &block_off,
1095                                                                 &name_offset);
1096                         xs->base = bucket_block(xs->bucket, block_off);
1097                 }
1098                 if (ocfs2_xattr_is_local(xs->here)) {
1099                         memcpy(buffer, (void *)xs->base +
1100                                name_offset + name_len, size);
1101                 } else {
1102                         xv = (struct ocfs2_xattr_value_root *)
1103                                 (xs->base + name_offset + name_len);
1104                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1105                                                             buffer, size);
1106                         if (ret < 0) {
1107                                 mlog_errno(ret);
1108                                 goto cleanup;
1109                         }
1110                 }
1111         }
1112         ret = size;
1113 cleanup:
1114         ocfs2_xattr_bucket_free(xs->bucket);
1115
1116         brelse(xs->xattr_bh);
1117         xs->xattr_bh = NULL;
1118         return ret;
1119 }
1120
1121 int ocfs2_xattr_get_nolock(struct inode *inode,
1122                            struct buffer_head *di_bh,
1123                            int name_index,
1124                            const char *name,
1125                            void *buffer,
1126                            size_t buffer_size)
1127 {
1128         int ret;
1129         struct ocfs2_dinode *di = NULL;
1130         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1131         struct ocfs2_xattr_search xis = {
1132                 .not_found = -ENODATA,
1133         };
1134         struct ocfs2_xattr_search xbs = {
1135                 .not_found = -ENODATA,
1136         };
1137
1138         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1139                 return -EOPNOTSUPP;
1140
1141         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1142                 ret = -ENODATA;
1143
1144         xis.inode_bh = xbs.inode_bh = di_bh;
1145         di = (struct ocfs2_dinode *)di_bh->b_data;
1146
1147         down_read(&oi->ip_xattr_sem);
1148         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1149                                     buffer_size, &xis);
1150         if (ret == -ENODATA && di->i_xattr_loc)
1151                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1152                                             buffer_size, &xbs);
1153         up_read(&oi->ip_xattr_sem);
1154
1155         return ret;
1156 }
1157
1158 /* ocfs2_xattr_get()
1159  *
1160  * Copy an extended attribute into the buffer provided.
1161  * Buffer is NULL to compute the size of buffer required.
1162  */
1163 static int ocfs2_xattr_get(struct inode *inode,
1164                            int name_index,
1165                            const char *name,
1166                            void *buffer,
1167                            size_t buffer_size)
1168 {
1169         int ret;
1170         struct buffer_head *di_bh = NULL;
1171
1172         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1173         if (ret < 0) {
1174                 mlog_errno(ret);
1175                 return ret;
1176         }
1177         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1178                                      name, buffer, buffer_size);
1179
1180         ocfs2_inode_unlock(inode, 0);
1181
1182         brelse(di_bh);
1183
1184         return ret;
1185 }
1186
1187 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1188                                            handle_t *handle,
1189                                            struct ocfs2_xattr_value_root *xv,
1190                                            const void *value,
1191                                            int value_len)
1192 {
1193         int ret = 0, i, cp_len;
1194         u16 blocksize = inode->i_sb->s_blocksize;
1195         u32 p_cluster, num_clusters;
1196         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1197         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1198         u64 blkno;
1199         struct buffer_head *bh = NULL;
1200
1201         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1202
1203         while (cpos < clusters) {
1204                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1205                                                &num_clusters, &xv->xr_list);
1206                 if (ret) {
1207                         mlog_errno(ret);
1208                         goto out;
1209                 }
1210
1211                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1212
1213                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1214                         ret = ocfs2_read_block(inode, blkno, &bh, NULL);
1215                         if (ret) {
1216                                 mlog_errno(ret);
1217                                 goto out;
1218                         }
1219
1220                         ret = ocfs2_journal_access(handle,
1221                                                    inode,
1222                                                    bh,
1223                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1224                         if (ret < 0) {
1225                                 mlog_errno(ret);
1226                                 goto out;
1227                         }
1228
1229                         cp_len = value_len > blocksize ? blocksize : value_len;
1230                         memcpy(bh->b_data, value, cp_len);
1231                         value_len -= cp_len;
1232                         value += cp_len;
1233                         if (cp_len < blocksize)
1234                                 memset(bh->b_data + cp_len, 0,
1235                                        blocksize - cp_len);
1236
1237                         ret = ocfs2_journal_dirty(handle, bh);
1238                         if (ret < 0) {
1239                                 mlog_errno(ret);
1240                                 goto out;
1241                         }
1242                         brelse(bh);
1243                         bh = NULL;
1244
1245                         /*
1246                          * XXX: do we need to empty all the following
1247                          * blocks in this cluster?
1248                          */
1249                         if (!value_len)
1250                                 break;
1251                 }
1252                 cpos += num_clusters;
1253         }
1254 out:
1255         brelse(bh);
1256
1257         return ret;
1258 }
1259
1260 static int ocfs2_xattr_cleanup(struct inode *inode,
1261                                handle_t *handle,
1262                                struct ocfs2_xattr_info *xi,
1263                                struct ocfs2_xattr_search *xs,
1264                                struct ocfs2_xattr_value_buf *vb,
1265                                size_t offs)
1266 {
1267         int ret = 0;
1268         size_t name_len = strlen(xi->name);
1269         void *val = xs->base + offs;
1270         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1271
1272         ret = vb->vb_access(handle, inode, vb->vb_bh,
1273                             OCFS2_JOURNAL_ACCESS_WRITE);
1274         if (ret) {
1275                 mlog_errno(ret);
1276                 goto out;
1277         }
1278         /* Decrease xattr count */
1279         le16_add_cpu(&xs->header->xh_count, -1);
1280         /* Remove the xattr entry and tree root which has already be set*/
1281         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1282         memset(val, 0, size);
1283
1284         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1285         if (ret < 0)
1286                 mlog_errno(ret);
1287 out:
1288         return ret;
1289 }
1290
1291 static int ocfs2_xattr_update_entry(struct inode *inode,
1292                                     handle_t *handle,
1293                                     struct ocfs2_xattr_info *xi,
1294                                     struct ocfs2_xattr_search *xs,
1295                                     struct ocfs2_xattr_value_buf *vb,
1296                                     size_t offs)
1297 {
1298         int ret;
1299
1300         ret = vb->vb_access(handle, inode, vb->vb_bh,
1301                             OCFS2_JOURNAL_ACCESS_WRITE);
1302         if (ret) {
1303                 mlog_errno(ret);
1304                 goto out;
1305         }
1306
1307         xs->here->xe_name_offset = cpu_to_le16(offs);
1308         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1309         if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1310                 ocfs2_xattr_set_local(xs->here, 1);
1311         else
1312                 ocfs2_xattr_set_local(xs->here, 0);
1313         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1314
1315         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1316         if (ret < 0)
1317                 mlog_errno(ret);
1318 out:
1319         return ret;
1320 }
1321
1322 /*
1323  * ocfs2_xattr_set_value_outside()
1324  *
1325  * Set large size value in B tree.
1326  */
1327 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1328                                          struct ocfs2_xattr_info *xi,
1329                                          struct ocfs2_xattr_search *xs,
1330                                          struct ocfs2_xattr_set_ctxt *ctxt,
1331                                          struct ocfs2_xattr_value_buf *vb,
1332                                          size_t offs)
1333 {
1334         size_t name_len = strlen(xi->name);
1335         void *val = xs->base + offs;
1336         struct ocfs2_xattr_value_root *xv = NULL;
1337         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1338         int ret = 0;
1339
1340         memset(val, 0, size);
1341         memcpy(val, xi->name, name_len);
1342         xv = (struct ocfs2_xattr_value_root *)
1343                 (val + OCFS2_XATTR_SIZE(name_len));
1344         xv->xr_clusters = 0;
1345         xv->xr_last_eb_blk = 0;
1346         xv->xr_list.l_tree_depth = 0;
1347         xv->xr_list.l_count = cpu_to_le16(1);
1348         xv->xr_list.l_next_free_rec = 0;
1349         vb->vb_xv = xv;
1350
1351         ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt);
1352         if (ret < 0) {
1353                 mlog_errno(ret);
1354                 return ret;
1355         }
1356         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1357         if (ret < 0) {
1358                 mlog_errno(ret);
1359                 return ret;
1360         }
1361         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv,
1362                                               xi->value, xi->value_len);
1363         if (ret < 0)
1364                 mlog_errno(ret);
1365
1366         return ret;
1367 }
1368
1369 /*
1370  * ocfs2_xattr_set_entry_local()
1371  *
1372  * Set, replace or remove extended attribute in local.
1373  */
1374 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1375                                         struct ocfs2_xattr_info *xi,
1376                                         struct ocfs2_xattr_search *xs,
1377                                         struct ocfs2_xattr_entry *last,
1378                                         size_t min_offs)
1379 {
1380         size_t name_len = strlen(xi->name);
1381         int i;
1382
1383         if (xi->value && xs->not_found) {
1384                 /* Insert the new xattr entry. */
1385                 le16_add_cpu(&xs->header->xh_count, 1);
1386                 ocfs2_xattr_set_type(last, xi->name_index);
1387                 ocfs2_xattr_set_local(last, 1);
1388                 last->xe_name_len = name_len;
1389         } else {
1390                 void *first_val;
1391                 void *val;
1392                 size_t offs, size;
1393
1394                 first_val = xs->base + min_offs;
1395                 offs = le16_to_cpu(xs->here->xe_name_offset);
1396                 val = xs->base + offs;
1397
1398                 if (le64_to_cpu(xs->here->xe_value_size) >
1399                     OCFS2_XATTR_INLINE_SIZE)
1400                         size = OCFS2_XATTR_SIZE(name_len) +
1401                                 OCFS2_XATTR_ROOT_SIZE;
1402                 else
1403                         size = OCFS2_XATTR_SIZE(name_len) +
1404                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1405
1406                 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1407                                 OCFS2_XATTR_SIZE(xi->value_len)) {
1408                         /* The old and the new value have the
1409                            same size. Just replace the value. */
1410                         ocfs2_xattr_set_local(xs->here, 1);
1411                         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1412                         /* Clear value bytes. */
1413                         memset(val + OCFS2_XATTR_SIZE(name_len),
1414                                0,
1415                                OCFS2_XATTR_SIZE(xi->value_len));
1416                         memcpy(val + OCFS2_XATTR_SIZE(name_len),
1417                                xi->value,
1418                                xi->value_len);
1419                         return;
1420                 }
1421                 /* Remove the old name+value. */
1422                 memmove(first_val + size, first_val, val - first_val);
1423                 memset(first_val, 0, size);
1424                 xs->here->xe_name_hash = 0;
1425                 xs->here->xe_name_offset = 0;
1426                 ocfs2_xattr_set_local(xs->here, 1);
1427                 xs->here->xe_value_size = 0;
1428
1429                 min_offs += size;
1430
1431                 /* Adjust all value offsets. */
1432                 last = xs->header->xh_entries;
1433                 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1434                         size_t o = le16_to_cpu(last->xe_name_offset);
1435
1436                         if (o < offs)
1437                                 last->xe_name_offset = cpu_to_le16(o + size);
1438                         last += 1;
1439                 }
1440
1441                 if (!xi->value) {
1442                         /* Remove the old entry. */
1443                         last -= 1;
1444                         memmove(xs->here, xs->here + 1,
1445                                 (void *)last - (void *)xs->here);
1446                         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1447                         le16_add_cpu(&xs->header->xh_count, -1);
1448                 }
1449         }
1450         if (xi->value) {
1451                 /* Insert the new name+value. */
1452                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1453                                 OCFS2_XATTR_SIZE(xi->value_len);
1454                 void *val = xs->base + min_offs - size;
1455
1456                 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1457                 memset(val, 0, size);
1458                 memcpy(val, xi->name, name_len);
1459                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1460                        xi->value,
1461                        xi->value_len);
1462                 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1463                 ocfs2_xattr_set_local(xs->here, 1);
1464                 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1465         }
1466
1467         return;
1468 }
1469
1470 /*
1471  * ocfs2_xattr_set_entry()
1472  *
1473  * Set extended attribute entry into inode or block.
1474  *
1475  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1476  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1477  * then set value in B tree with set_value_outside().
1478  */
1479 static int ocfs2_xattr_set_entry(struct inode *inode,
1480                                  struct ocfs2_xattr_info *xi,
1481                                  struct ocfs2_xattr_search *xs,
1482                                  struct ocfs2_xattr_set_ctxt *ctxt,
1483                                  int flag)
1484 {
1485         struct ocfs2_xattr_entry *last;
1486         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1487         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1488         size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1489         size_t size_l = 0;
1490         handle_t *handle = ctxt->handle;
1491         int free, i, ret;
1492         struct ocfs2_xattr_info xi_l = {
1493                 .name_index = xi->name_index,
1494                 .name = xi->name,
1495                 .value = xi->value,
1496                 .value_len = xi->value_len,
1497         };
1498         struct ocfs2_xattr_value_buf vb = {
1499                 .vb_bh = xs->xattr_bh,
1500                 .vb_access = ocfs2_journal_access_di,
1501         };
1502
1503         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1504                 BUG_ON(xs->xattr_bh == xs->inode_bh);
1505                 vb.vb_access = ocfs2_journal_access_xb;
1506         } else
1507                 BUG_ON(xs->xattr_bh != xs->inode_bh);
1508
1509         /* Compute min_offs, last and free space. */
1510         last = xs->header->xh_entries;
1511
1512         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1513                 size_t offs = le16_to_cpu(last->xe_name_offset);
1514                 if (offs < min_offs)
1515                         min_offs = offs;
1516                 last += 1;
1517         }
1518
1519         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
1520         if (free < 0)
1521                 return -EIO;
1522
1523         if (!xs->not_found) {
1524                 size_t size = 0;
1525                 if (ocfs2_xattr_is_local(xs->here))
1526                         size = OCFS2_XATTR_SIZE(name_len) +
1527                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1528                 else
1529                         size = OCFS2_XATTR_SIZE(name_len) +
1530                                 OCFS2_XATTR_ROOT_SIZE;
1531                 free += (size + sizeof(struct ocfs2_xattr_entry));
1532         }
1533         /* Check free space in inode or block */
1534         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1535                 if (free < sizeof(struct ocfs2_xattr_entry) +
1536                            OCFS2_XATTR_SIZE(name_len) +
1537                            OCFS2_XATTR_ROOT_SIZE) {
1538                         ret = -ENOSPC;
1539                         goto out;
1540                 }
1541                 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1542                 xi_l.value = (void *)&def_xv;
1543                 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1544         } else if (xi->value) {
1545                 if (free < sizeof(struct ocfs2_xattr_entry) +
1546                            OCFS2_XATTR_SIZE(name_len) +
1547                            OCFS2_XATTR_SIZE(xi->value_len)) {
1548                         ret = -ENOSPC;
1549                         goto out;
1550                 }
1551         }
1552
1553         if (!xs->not_found) {
1554                 /* For existing extended attribute */
1555                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1556                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1557                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1558                 void *val = xs->base + offs;
1559
1560                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1561                         /* Replace existing local xattr with tree root */
1562                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1563                                                             ctxt, &vb, offs);
1564                         if (ret < 0)
1565                                 mlog_errno(ret);
1566                         goto out;
1567                 } else if (!ocfs2_xattr_is_local(xs->here)) {
1568                         /* For existing xattr which has value outside */
1569                         vb.vb_xv = (struct ocfs2_xattr_value_root *)
1570                                 (val + OCFS2_XATTR_SIZE(name_len));
1571
1572                         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1573                                 /*
1574                                  * If new value need set outside also,
1575                                  * first truncate old value to new value,
1576                                  * then set new value with set_value_outside().
1577                                  */
1578                                 ret = ocfs2_xattr_value_truncate(inode,
1579                                                                  &vb,
1580                                                                  xi->value_len,
1581                                                                  ctxt);
1582                                 if (ret < 0) {
1583                                         mlog_errno(ret);
1584                                         goto out;
1585                                 }
1586
1587                                 ret = ocfs2_xattr_update_entry(inode,
1588                                                                handle,
1589                                                                xi,
1590                                                                xs,
1591                                                                &vb,
1592                                                                offs);
1593                                 if (ret < 0) {
1594                                         mlog_errno(ret);
1595                                         goto out;
1596                                 }
1597
1598                                 ret = __ocfs2_xattr_set_value_outside(inode,
1599                                                                 handle,
1600                                                                 vb.vb_xv,
1601                                                                 xi->value,
1602                                                                 xi->value_len);
1603                                 if (ret < 0)
1604                                         mlog_errno(ret);
1605                                 goto out;
1606                         } else {
1607                                 /*
1608                                  * If new value need set in local,
1609                                  * just trucate old value to zero.
1610                                  */
1611                                  ret = ocfs2_xattr_value_truncate(inode,
1612                                                                   &vb,
1613                                                                   0,
1614                                                                   ctxt);
1615                                 if (ret < 0)
1616                                         mlog_errno(ret);
1617                         }
1618                 }
1619         }
1620
1621         ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
1622                                       OCFS2_JOURNAL_ACCESS_WRITE);
1623         if (ret) {
1624                 mlog_errno(ret);
1625                 goto out;
1626         }
1627
1628         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1629                 ret = vb.vb_access(handle, inode, vb.vb_bh,
1630                                    OCFS2_JOURNAL_ACCESS_WRITE);
1631                 if (ret) {
1632                         mlog_errno(ret);
1633                         goto out;
1634                 }
1635         }
1636
1637         /*
1638          * Set value in local, include set tree root in local.
1639          * This is the first step for value size >INLINE_SIZE.
1640          */
1641         ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1642
1643         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1644                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1645                 if (ret < 0) {
1646                         mlog_errno(ret);
1647                         goto out;
1648                 }
1649         }
1650
1651         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1652             (flag & OCFS2_INLINE_XATTR_FL)) {
1653                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1654                 unsigned int xattrsize = osb->s_xattr_inline_size;
1655
1656                 /*
1657                  * Adjust extent record count or inline data size
1658                  * to reserve space for extended attribute.
1659                  */
1660                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1661                         struct ocfs2_inline_data *idata = &di->id2.i_data;
1662                         le16_add_cpu(&idata->id_count, -xattrsize);
1663                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1664                         struct ocfs2_extent_list *el = &di->id2.i_list;
1665                         le16_add_cpu(&el->l_count, -(xattrsize /
1666                                         sizeof(struct ocfs2_extent_rec)));
1667                 }
1668                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1669         }
1670         /* Update xattr flag */
1671         spin_lock(&oi->ip_lock);
1672         oi->ip_dyn_features |= flag;
1673         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1674         spin_unlock(&oi->ip_lock);
1675
1676         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1677         if (ret < 0)
1678                 mlog_errno(ret);
1679
1680         if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1681                 /*
1682                  * Set value outside in B tree.
1683                  * This is the second step for value size > INLINE_SIZE.
1684                  */
1685                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1686                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
1687                                                     &vb, offs);
1688                 if (ret < 0) {
1689                         int ret2;
1690
1691                         mlog_errno(ret);
1692                         /*
1693                          * If set value outside failed, we have to clean
1694                          * the junk tree root we have already set in local.
1695                          */
1696                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1697                                                    xi, xs, &vb, offs);
1698                         if (ret2 < 0)
1699                                 mlog_errno(ret2);
1700                 }
1701         }
1702 out:
1703         return ret;
1704 }
1705
1706 static int ocfs2_remove_value_outside(struct inode*inode,
1707                                       struct ocfs2_xattr_value_buf *vb,
1708                                       struct ocfs2_xattr_header *header)
1709 {
1710         int ret = 0, i;
1711         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1712         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1713
1714         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1715
1716         ctxt.handle = ocfs2_start_trans(osb,
1717                                         ocfs2_remove_extent_credits(osb->sb));
1718         if (IS_ERR(ctxt.handle)) {
1719                 ret = PTR_ERR(ctxt.handle);
1720                 mlog_errno(ret);
1721                 goto out;
1722         }
1723
1724         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1725                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1726
1727                 if (!ocfs2_xattr_is_local(entry)) {
1728                         void *val;
1729
1730                         val = (void *)header +
1731                                 le16_to_cpu(entry->xe_name_offset);
1732                         vb->vb_xv = (struct ocfs2_xattr_value_root *)
1733                                 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1734                         ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1735                         if (ret < 0) {
1736                                 mlog_errno(ret);
1737                                 break;
1738                         }
1739                 }
1740         }
1741
1742         ocfs2_commit_trans(osb, ctxt.handle);
1743         ocfs2_schedule_truncate_log_flush(osb, 1);
1744         ocfs2_run_deallocs(osb, &ctxt.dealloc);
1745 out:
1746         return ret;
1747 }
1748
1749 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1750                                     struct buffer_head *di_bh)
1751 {
1752
1753         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1754         struct ocfs2_xattr_header *header;
1755         int ret;
1756         struct ocfs2_xattr_value_buf vb = {
1757                 .vb_bh = di_bh,
1758                 .vb_access = ocfs2_journal_access_di,
1759         };
1760
1761         header = (struct ocfs2_xattr_header *)
1762                  ((void *)di + inode->i_sb->s_blocksize -
1763                  le16_to_cpu(di->i_xattr_inline_size));
1764
1765         ret = ocfs2_remove_value_outside(inode, &vb, header);
1766
1767         return ret;
1768 }
1769
1770 static int ocfs2_xattr_block_remove(struct inode *inode,
1771                                     struct buffer_head *blk_bh)
1772 {
1773         struct ocfs2_xattr_block *xb;
1774         int ret = 0;
1775         struct ocfs2_xattr_value_buf vb = {
1776                 .vb_bh = blk_bh,
1777                 .vb_access = ocfs2_journal_access_xb,
1778         };
1779
1780         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1781         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1782                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1783                 ret = ocfs2_remove_value_outside(inode, &vb, header);
1784         } else
1785                 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1786
1787         return ret;
1788 }
1789
1790 static int ocfs2_xattr_free_block(struct inode *inode,
1791                                   u64 block)
1792 {
1793         struct inode *xb_alloc_inode;
1794         struct buffer_head *xb_alloc_bh = NULL;
1795         struct buffer_head *blk_bh = NULL;
1796         struct ocfs2_xattr_block *xb;
1797         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1798         handle_t *handle;
1799         int ret = 0;
1800         u64 blk, bg_blkno;
1801         u16 bit;
1802
1803         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
1804         if (ret < 0) {
1805                 mlog_errno(ret);
1806                 goto out;
1807         }
1808
1809         ret = ocfs2_xattr_block_remove(inode, blk_bh);
1810         if (ret < 0) {
1811                 mlog_errno(ret);
1812                 goto out;
1813         }
1814
1815         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1816         blk = le64_to_cpu(xb->xb_blkno);
1817         bit = le16_to_cpu(xb->xb_suballoc_bit);
1818         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1819
1820         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1821                                 EXTENT_ALLOC_SYSTEM_INODE,
1822                                 le16_to_cpu(xb->xb_suballoc_slot));
1823         if (!xb_alloc_inode) {
1824                 ret = -ENOMEM;
1825                 mlog_errno(ret);
1826                 goto out;
1827         }
1828         mutex_lock(&xb_alloc_inode->i_mutex);
1829
1830         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1831         if (ret < 0) {
1832                 mlog_errno(ret);
1833                 goto out_mutex;
1834         }
1835
1836         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1837         if (IS_ERR(handle)) {
1838                 ret = PTR_ERR(handle);
1839                 mlog_errno(ret);
1840                 goto out_unlock;
1841         }
1842
1843         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1844                                        bit, bg_blkno, 1);
1845         if (ret < 0)
1846                 mlog_errno(ret);
1847
1848         ocfs2_commit_trans(osb, handle);
1849 out_unlock:
1850         ocfs2_inode_unlock(xb_alloc_inode, 1);
1851         brelse(xb_alloc_bh);
1852 out_mutex:
1853         mutex_unlock(&xb_alloc_inode->i_mutex);
1854         iput(xb_alloc_inode);
1855 out:
1856         brelse(blk_bh);
1857         return ret;
1858 }
1859
1860 /*
1861  * ocfs2_xattr_remove()
1862  *
1863  * Free extended attribute resources associated with this inode.
1864  */
1865 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1866 {
1867         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1868         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1869         handle_t *handle;
1870         int ret;
1871
1872         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1873                 return 0;
1874
1875         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1876                 return 0;
1877
1878         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1879                 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1880                 if (ret < 0) {
1881                         mlog_errno(ret);
1882                         goto out;
1883                 }
1884         }
1885
1886         if (di->i_xattr_loc) {
1887                 ret = ocfs2_xattr_free_block(inode,
1888                                              le64_to_cpu(di->i_xattr_loc));
1889                 if (ret < 0) {
1890                         mlog_errno(ret);
1891                         goto out;
1892                 }
1893         }
1894
1895         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1896                                    OCFS2_INODE_UPDATE_CREDITS);
1897         if (IS_ERR(handle)) {
1898                 ret = PTR_ERR(handle);
1899                 mlog_errno(ret);
1900                 goto out;
1901         }
1902         ret = ocfs2_journal_access_di(handle, inode, di_bh,
1903                                       OCFS2_JOURNAL_ACCESS_WRITE);
1904         if (ret) {
1905                 mlog_errno(ret);
1906                 goto out_commit;
1907         }
1908
1909         di->i_xattr_loc = 0;
1910
1911         spin_lock(&oi->ip_lock);
1912         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1913         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1914         spin_unlock(&oi->ip_lock);
1915
1916         ret = ocfs2_journal_dirty(handle, di_bh);
1917         if (ret < 0)
1918                 mlog_errno(ret);
1919 out_commit:
1920         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1921 out:
1922         return ret;
1923 }
1924
1925 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1926                                         struct ocfs2_dinode *di)
1927 {
1928         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1929         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1930         int free;
1931
1932         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1933                 return 0;
1934
1935         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1936                 struct ocfs2_inline_data *idata = &di->id2.i_data;
1937                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1938         } else if (ocfs2_inode_is_fast_symlink(inode)) {
1939                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1940                         le64_to_cpu(di->i_size);
1941         } else {
1942                 struct ocfs2_extent_list *el = &di->id2.i_list;
1943                 free = (le16_to_cpu(el->l_count) -
1944                         le16_to_cpu(el->l_next_free_rec)) *
1945                         sizeof(struct ocfs2_extent_rec);
1946         }
1947         if (free >= xattrsize)
1948                 return 1;
1949
1950         return 0;
1951 }
1952
1953 /*
1954  * ocfs2_xattr_ibody_find()
1955  *
1956  * Find extended attribute in inode block and
1957  * fill search info into struct ocfs2_xattr_search.
1958  */
1959 static int ocfs2_xattr_ibody_find(struct inode *inode,
1960                                   int name_index,
1961                                   const char *name,
1962                                   struct ocfs2_xattr_search *xs)
1963 {
1964         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1965         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1966         int ret;
1967         int has_space = 0;
1968
1969         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1970                 return 0;
1971
1972         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1973                 down_read(&oi->ip_alloc_sem);
1974                 has_space = ocfs2_xattr_has_space_inline(inode, di);
1975                 up_read(&oi->ip_alloc_sem);
1976                 if (!has_space)
1977                         return 0;
1978         }
1979
1980         xs->xattr_bh = xs->inode_bh;
1981         xs->end = (void *)di + inode->i_sb->s_blocksize;
1982         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1983                 xs->header = (struct ocfs2_xattr_header *)
1984                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1985         else
1986                 xs->header = (struct ocfs2_xattr_header *)
1987                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1988         xs->base = (void *)xs->header;
1989         xs->here = xs->header->xh_entries;
1990
1991         /* Find the named attribute. */
1992         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1993                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1994                 if (ret && ret != -ENODATA)
1995                         return ret;
1996                 xs->not_found = ret;
1997         }
1998
1999         return 0;
2000 }
2001
2002 /*
2003  * ocfs2_xattr_ibody_set()
2004  *
2005  * Set, replace or remove an extended attribute into inode block.
2006  *
2007  */
2008 static int ocfs2_xattr_ibody_set(struct inode *inode,
2009                                  struct ocfs2_xattr_info *xi,
2010                                  struct ocfs2_xattr_search *xs,
2011                                  struct ocfs2_xattr_set_ctxt *ctxt)
2012 {
2013         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2014         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2015         int ret;
2016
2017         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2018                 return -ENOSPC;
2019
2020         down_write(&oi->ip_alloc_sem);
2021         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2022                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2023                         ret = -ENOSPC;
2024                         goto out;
2025                 }
2026         }
2027
2028         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2029                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2030 out:
2031         up_write(&oi->ip_alloc_sem);
2032
2033         return ret;
2034 }
2035
2036 /*
2037  * ocfs2_xattr_block_find()
2038  *
2039  * Find extended attribute in external block and
2040  * fill search info into struct ocfs2_xattr_search.
2041  */
2042 static int ocfs2_xattr_block_find(struct inode *inode,
2043                                   int name_index,
2044                                   const char *name,
2045                                   struct ocfs2_xattr_search *xs)
2046 {
2047         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2048         struct buffer_head *blk_bh = NULL;
2049         struct ocfs2_xattr_block *xb;
2050         int ret = 0;
2051
2052         if (!di->i_xattr_loc)
2053                 return ret;
2054
2055         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2056                                      &blk_bh);
2057         if (ret < 0) {
2058                 mlog_errno(ret);
2059                 return ret;
2060         }
2061
2062         xs->xattr_bh = blk_bh;
2063         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2064
2065         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2066                 xs->header = &xb->xb_attrs.xb_header;
2067                 xs->base = (void *)xs->header;
2068                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2069                 xs->here = xs->header->xh_entries;
2070
2071                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2072         } else
2073                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2074                                                    name_index,
2075                                                    name, xs);
2076
2077         if (ret && ret != -ENODATA) {
2078                 xs->xattr_bh = NULL;
2079                 goto cleanup;
2080         }
2081         xs->not_found = ret;
2082         return 0;
2083 cleanup:
2084         brelse(blk_bh);
2085
2086         return ret;
2087 }
2088
2089 /*
2090  * ocfs2_xattr_block_set()
2091  *
2092  * Set, replace or remove an extended attribute into external block.
2093  *
2094  */
2095 static int ocfs2_xattr_block_set(struct inode *inode,
2096                                  struct ocfs2_xattr_info *xi,
2097                                  struct ocfs2_xattr_search *xs,
2098                                  struct ocfs2_xattr_set_ctxt *ctxt)
2099 {
2100         struct buffer_head *new_bh = NULL;
2101         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2102         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
2103         handle_t *handle = ctxt->handle;
2104         struct ocfs2_xattr_block *xblk = NULL;
2105         u16 suballoc_bit_start;
2106         u32 num_got;
2107         u64 first_blkno;
2108         int ret;
2109
2110         if (!xs->xattr_bh) {
2111                 ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
2112                                               OCFS2_JOURNAL_ACCESS_CREATE);
2113                 if (ret < 0) {
2114                         mlog_errno(ret);
2115                         goto end;
2116                 }
2117
2118                 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
2119                                            &suballoc_bit_start, &num_got,
2120                                            &first_blkno);
2121                 if (ret < 0) {
2122                         mlog_errno(ret);
2123                         goto end;
2124                 }
2125
2126                 new_bh = sb_getblk(inode->i_sb, first_blkno);
2127                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
2128
2129                 ret = ocfs2_journal_access_xb(handle, inode, new_bh,
2130                                               OCFS2_JOURNAL_ACCESS_CREATE);
2131                 if (ret < 0) {
2132                         mlog_errno(ret);
2133                         goto end;
2134                 }
2135
2136                 /* Initialize ocfs2_xattr_block */
2137                 xs->xattr_bh = new_bh;
2138                 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2139                 memset(xblk, 0, inode->i_sb->s_blocksize);
2140                 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2141                 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2142                 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2143                 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2144                 xblk->xb_blkno = cpu_to_le64(first_blkno);
2145
2146                 xs->header = &xblk->xb_attrs.xb_header;
2147                 xs->base = (void *)xs->header;
2148                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2149                 xs->here = xs->header->xh_entries;
2150
2151                 ret = ocfs2_journal_dirty(handle, new_bh);
2152                 if (ret < 0) {
2153                         mlog_errno(ret);
2154                         goto end;
2155                 }
2156                 di->i_xattr_loc = cpu_to_le64(first_blkno);
2157                 ocfs2_journal_dirty(handle, xs->inode_bh);
2158         } else
2159                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2160
2161         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2162                 /* Set extended attribute into external block */
2163                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2164                                             OCFS2_HAS_XATTR_FL);
2165                 if (!ret || ret != -ENOSPC)
2166                         goto end;
2167
2168                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2169                 if (ret)
2170                         goto end;
2171         }
2172
2173         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2174
2175 end:
2176
2177         return ret;
2178 }
2179
2180 /* Check whether the new xattr can be inserted into the inode. */
2181 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2182                                        struct ocfs2_xattr_info *xi,
2183                                        struct ocfs2_xattr_search *xs)
2184 {
2185         u64 value_size;
2186         struct ocfs2_xattr_entry *last;
2187         int free, i;
2188         size_t min_offs = xs->end - xs->base;
2189
2190         if (!xs->header)
2191                 return 0;
2192
2193         last = xs->header->xh_entries;
2194
2195         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2196                 size_t offs = le16_to_cpu(last->xe_name_offset);
2197                 if (offs < min_offs)
2198                         min_offs = offs;
2199                 last += 1;
2200         }
2201
2202         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2203         if (free < 0)
2204                 return 0;
2205
2206         BUG_ON(!xs->not_found);
2207
2208         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2209                 value_size = OCFS2_XATTR_ROOT_SIZE;
2210         else
2211                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2212
2213         if (free >= sizeof(struct ocfs2_xattr_entry) +
2214                    OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2215                 return 1;
2216
2217         return 0;
2218 }
2219
2220 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2221                                      struct ocfs2_dinode *di,
2222                                      struct ocfs2_xattr_info *xi,
2223                                      struct ocfs2_xattr_search *xis,
2224                                      struct ocfs2_xattr_search *xbs,
2225                                      int *clusters_need,
2226                                      int *meta_need,
2227                                      int *credits_need)
2228 {
2229         int ret = 0, old_in_xb = 0;
2230         int clusters_add = 0, meta_add = 0, credits = 0;
2231         struct buffer_head *bh = NULL;
2232         struct ocfs2_xattr_block *xb = NULL;
2233         struct ocfs2_xattr_entry *xe = NULL;
2234         struct ocfs2_xattr_value_root *xv = NULL;
2235         char *base = NULL;
2236         int name_offset, name_len = 0;
2237         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2238                                                     xi->value_len);
2239         u64 value_size;
2240
2241         /*
2242          * Calculate the clusters we need to write.
2243          * No matter whether we replace an old one or add a new one,
2244          * we need this for writing.
2245          */
2246         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2247                 credits += new_clusters *
2248                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
2249
2250         if (xis->not_found && xbs->not_found) {
2251                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2252
2253                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2254                         clusters_add += new_clusters;
2255                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2256                                                         &def_xv.xv.xr_list,
2257                                                         new_clusters);
2258                 }
2259
2260                 goto meta_guess;
2261         }
2262
2263         if (!xis->not_found) {
2264                 xe = xis->here;
2265                 name_offset = le16_to_cpu(xe->xe_name_offset);
2266                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2267                 base = xis->base;
2268                 credits += OCFS2_INODE_UPDATE_CREDITS;
2269         } else {
2270                 int i, block_off = 0;
2271                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2272                 xe = xbs->here;
2273                 name_offset = le16_to_cpu(xe->xe_name_offset);
2274                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2275                 i = xbs->here - xbs->header->xh_entries;
2276                 old_in_xb = 1;
2277
2278                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2279                         ret = ocfs2_xattr_bucket_get_name_value(inode,
2280                                                         bucket_xh(xbs->bucket),
2281                                                         i, &block_off,
2282                                                         &name_offset);
2283                         base = bucket_block(xbs->bucket, block_off);
2284                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2285                 } else {
2286                         base = xbs->base;
2287                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2288                 }
2289         }
2290
2291         /*
2292          * delete a xattr doesn't need metadata and cluster allocation.
2293          * so just calculate the credits and return.
2294          *
2295          * The credits for removing the value tree will be extended
2296          * by ocfs2_remove_extent itself.
2297          */
2298         if (!xi->value) {
2299                 if (!ocfs2_xattr_is_local(xe))
2300                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2301
2302                 goto out;
2303         }
2304
2305         /* do cluster allocation guess first. */
2306         value_size = le64_to_cpu(xe->xe_value_size);
2307
2308         if (old_in_xb) {
2309                 /*
2310                  * In xattr set, we always try to set the xe in inode first,
2311                  * so if it can be inserted into inode successfully, the old
2312                  * one will be removed from the xattr block, and this xattr
2313                  * will be inserted into inode as a new xattr in inode.
2314                  */
2315                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2316                         clusters_add += new_clusters;
2317                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
2318                                     OCFS2_INODE_UPDATE_CREDITS;
2319                         if (!ocfs2_xattr_is_local(xe))
2320                                 credits += ocfs2_calc_extend_credits(
2321                                                         inode->i_sb,
2322                                                         &def_xv.xv.xr_list,
2323                                                         new_clusters);
2324                         goto out;
2325                 }
2326         }
2327
2328         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2329                 /* the new values will be stored outside. */
2330                 u32 old_clusters = 0;
2331
2332                 if (!ocfs2_xattr_is_local(xe)) {
2333                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
2334                                                                  value_size);
2335                         xv = (struct ocfs2_xattr_value_root *)
2336                              (base + name_offset + name_len);
2337                         value_size = OCFS2_XATTR_ROOT_SIZE;
2338                 } else
2339                         xv = &def_xv.xv;
2340
2341                 if (old_clusters >= new_clusters) {
2342                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2343                         goto out;
2344                 } else {
2345                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2346                         clusters_add += new_clusters - old_clusters;
2347                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2348                                                              &xv->xr_list,
2349                                                              new_clusters -
2350                                                              old_clusters);
2351                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2352                                 goto out;
2353                 }
2354         } else {
2355                 /*
2356                  * Now the new value will be stored inside. So if the new
2357                  * value is smaller than the size of value root or the old
2358                  * value, we don't need any allocation, otherwise we have
2359                  * to guess metadata allocation.
2360                  */
2361                 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2362                     (!ocfs2_xattr_is_local(xe) &&
2363                      OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2364                         goto out;
2365         }
2366
2367 meta_guess:
2368         /* calculate metadata allocation. */
2369         if (di->i_xattr_loc) {
2370                 if (!xbs->xattr_bh) {
2371                         ret = ocfs2_read_xattr_block(inode,
2372                                                      le64_to_cpu(di->i_xattr_loc),
2373                                                      &bh);
2374                         if (ret) {
2375                                 mlog_errno(ret);
2376                                 goto out;
2377                         }
2378
2379                         xb = (struct ocfs2_xattr_block *)bh->b_data;
2380                 } else
2381                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2382
2383                 /*
2384                  * If there is already an xattr tree, good, we can calculate
2385                  * like other b-trees. Otherwise we may have the chance of
2386                  * create a tree, the credit calculation is borrowed from
2387                  * ocfs2_calc_extend_credits with root_el = NULL. And the
2388                  * new tree will be cluster based, so no meta is needed.
2389                  */
2390                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2391                         struct ocfs2_extent_list *el =
2392                                  &xb->xb_attrs.xb_root.xt_list;
2393                         meta_add += ocfs2_extend_meta_needed(el);
2394                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2395                                                              el, 1);
2396                 } else
2397                         credits += OCFS2_SUBALLOC_ALLOC + 1;
2398
2399                 /*
2400                  * This cluster will be used either for new bucket or for
2401                  * new xattr block.
2402                  * If the cluster size is the same as the bucket size, one
2403                  * more is needed since we may need to extend the bucket
2404                  * also.
2405                  */
2406                 clusters_add += 1;
2407                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2408                 if (OCFS2_XATTR_BUCKET_SIZE ==
2409                         OCFS2_SB(inode->i_sb)->s_clustersize) {
2410                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2411                         clusters_add += 1;
2412                 }
2413         } else {
2414                 meta_add += 1;
2415                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2416         }
2417 out:
2418         if (clusters_need)
2419                 *clusters_need = clusters_add;
2420         if (meta_need)
2421                 *meta_need = meta_add;
2422         if (credits_need)
2423                 *credits_need = credits;
2424         brelse(bh);
2425         return ret;
2426 }
2427
2428 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2429                                      struct ocfs2_dinode *di,
2430                                      struct ocfs2_xattr_info *xi,
2431                                      struct ocfs2_xattr_search *xis,
2432                                      struct ocfs2_xattr_search *xbs,
2433                                      struct ocfs2_xattr_set_ctxt *ctxt,
2434                                      int *credits)
2435 {
2436         int clusters_add, meta_add, ret;
2437         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2438
2439         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2440
2441         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2442
2443         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2444                                         &clusters_add, &meta_add, credits);
2445         if (ret) {
2446                 mlog_errno(ret);
2447                 return ret;
2448         }
2449
2450         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2451              "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2452
2453         if (meta_add) {
2454                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2455                                                         &ctxt->meta_ac);
2456                 if (ret) {
2457                         mlog_errno(ret);
2458                         goto out;
2459                 }
2460         }
2461
2462         if (clusters_add) {
2463                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2464                 if (ret)
2465                         mlog_errno(ret);
2466         }
2467 out:
2468         if (ret) {
2469                 if (ctxt->meta_ac) {
2470                         ocfs2_free_alloc_context(ctxt->meta_ac);
2471                         ctxt->meta_ac = NULL;
2472                 }
2473
2474                 /*
2475                  * We cannot have an error and a non null ctxt->data_ac.
2476                  */
2477         }
2478
2479         return ret;
2480 }
2481
2482 static int __ocfs2_xattr_set_handle(struct inode *inode,
2483                                     struct ocfs2_dinode *di,
2484                                     struct ocfs2_xattr_info *xi,
2485                                     struct ocfs2_xattr_search *xis,
2486                                     struct ocfs2_xattr_search *xbs,
2487                                     struct ocfs2_xattr_set_ctxt *ctxt)
2488 {
2489         int ret = 0, credits, old_found;
2490
2491         if (!xi->value) {
2492                 /* Remove existing extended attribute */
2493                 if (!xis->not_found)
2494                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2495                 else if (!xbs->not_found)
2496                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2497         } else {
2498                 /* We always try to set extended attribute into inode first*/
2499                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2500                 if (!ret && !xbs->not_found) {
2501                         /*
2502                          * If succeed and that extended attribute existing in
2503                          * external block, then we will remove it.
2504                          */
2505                         xi->value = NULL;
2506                         xi->value_len = 0;
2507
2508                         old_found = xis->not_found;
2509                         xis->not_found = -ENODATA;
2510                         ret = ocfs2_calc_xattr_set_need(inode,
2511                                                         di,
2512                                                         xi,
2513                                                         xis,
2514                                                         xbs,
2515                                                         NULL,
2516                                                         NULL,
2517                                                         &credits);
2518                         xis->not_found = old_found;
2519                         if (ret) {
2520                                 mlog_errno(ret);
2521                                 goto out;
2522                         }
2523
2524                         ret = ocfs2_extend_trans(ctxt->handle, credits +
2525                                         ctxt->handle->h_buffer_credits);
2526                         if (ret) {
2527                                 mlog_errno(ret);
2528                                 goto out;
2529                         }
2530                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2531                 } else if (ret == -ENOSPC) {
2532                         if (di->i_xattr_loc && !xbs->xattr_bh) {
2533                                 ret = ocfs2_xattr_block_find(inode,
2534                                                              xi->name_index,
2535                                                              xi->name, xbs);
2536                                 if (ret)
2537                                         goto out;
2538
2539                                 old_found = xis->not_found;
2540                                 xis->not_found = -ENODATA;
2541                                 ret = ocfs2_calc_xattr_set_need(inode,
2542                                                                 di,
2543                                                                 xi,
2544                                                                 xis,
2545                                                                 xbs,
2546                                                                 NULL,
2547                                                                 NULL,
2548                                                                 &credits);
2549                                 xis->not_found = old_found;
2550                                 if (ret) {
2551                                         mlog_errno(ret);
2552                                         goto out;
2553                                 }
2554
2555                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2556                                         ctxt->handle->h_buffer_credits);
2557                                 if (ret) {
2558                                         mlog_errno(ret);
2559                                         goto out;
2560                                 }
2561                         }
2562                         /*
2563                          * If no space in inode, we will set extended attribute
2564                          * into external block.
2565                          */
2566                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2567                         if (ret)
2568                                 goto out;
2569                         if (!xis->not_found) {
2570                                 /*
2571                                  * If succeed and that extended attribute
2572                                  * existing in inode, we will remove it.
2573                                  */
2574                                 xi->value = NULL;
2575                                 xi->value_len = 0;
2576                                 xbs->not_found = -ENODATA;
2577                                 ret = ocfs2_calc_xattr_set_need(inode,
2578                                                                 di,
2579                                                                 xi,
2580                                                                 xis,
2581                                                                 xbs,
2582                                                                 NULL,
2583                                                                 NULL,
2584                                                                 &credits);
2585                                 if (ret) {
2586                                         mlog_errno(ret);
2587                                         goto out;
2588                                 }
2589
2590                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2591                                                 ctxt->handle->h_buffer_credits);
2592                                 if (ret) {
2593                                         mlog_errno(ret);
2594                                         goto out;
2595                                 }
2596                                 ret = ocfs2_xattr_ibody_set(inode, xi,
2597                                                             xis, ctxt);
2598                         }
2599                 }
2600         }
2601
2602         if (!ret) {
2603                 /* Update inode ctime. */
2604                 ret = ocfs2_journal_access_di(ctxt->handle, inode,
2605                                               xis->inode_bh,
2606                                               OCFS2_JOURNAL_ACCESS_WRITE);
2607                 if (ret) {
2608                         mlog_errno(ret);
2609                         goto out;
2610                 }
2611
2612                 inode->i_ctime = CURRENT_TIME;
2613                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
2614                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
2615                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
2616         }
2617 out:
2618         return ret;
2619 }
2620
2621 /*
2622  * This function only called duing creating inode
2623  * for init security/acl xattrs of the new inode.
2624  * All transanction credits have been reserved in mknod.
2625  */
2626 int ocfs2_xattr_set_handle(handle_t *handle,
2627                            struct inode *inode,
2628                            struct buffer_head *di_bh,
2629                            int name_index,
2630                            const char *name,
2631                            const void *value,
2632                            size_t value_len,
2633                            int flags,
2634                            struct ocfs2_alloc_context *meta_ac,
2635                            struct ocfs2_alloc_context *data_ac)
2636 {
2637         struct ocfs2_dinode *di;
2638         int ret;
2639
2640         struct ocfs2_xattr_info xi = {
2641                 .name_index = name_index,
2642                 .name = name,
2643                 .value = value,
2644                 .value_len = value_len,
2645         };
2646
2647         struct ocfs2_xattr_search xis = {
2648                 .not_found = -ENODATA,
2649         };
2650
2651         struct ocfs2_xattr_search xbs = {
2652                 .not_found = -ENODATA,
2653         };
2654
2655         struct ocfs2_xattr_set_ctxt ctxt = {
2656                 .handle = handle,
2657                 .meta_ac = meta_ac,
2658                 .data_ac = data_ac,
2659         };
2660
2661         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2662                 return -EOPNOTSUPP;
2663
2664         /*
2665          * In extreme situation, may need xattr bucket when
2666          * block size is too small. And we have already reserved
2667          * the credits for bucket in mknod.
2668          */
2669         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
2670                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
2671                 if (!xbs.bucket) {
2672                         mlog_errno(-ENOMEM);
2673                         return -ENOMEM;
2674                 }
2675         }
2676
2677         xis.inode_bh = xbs.inode_bh = di_bh;
2678         di = (struct ocfs2_dinode *)di_bh->b_data;
2679
2680         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2681
2682         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2683         if (ret)
2684                 goto cleanup;
2685         if (xis.not_found) {
2686                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2687                 if (ret)
2688                         goto cleanup;
2689         }
2690
2691         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2692
2693 cleanup:
2694         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2695         brelse(xbs.xattr_bh);
2696         ocfs2_xattr_bucket_free(xbs.bucket);
2697
2698         return ret;
2699 }
2700
2701 /*
2702  * ocfs2_xattr_set()
2703  *
2704  * Set, replace or remove an extended attribute for this inode.
2705  * value is NULL to remove an existing extended attribute, else either
2706  * create or replace an extended attribute.
2707  */
2708 int ocfs2_xattr_set(struct inode *inode,
2709                     int name_index,
2710                     const char *name,
2711                     const void *value,
2712                     size_t value_len,
2713                     int flags)
2714 {
2715         struct buffer_head *di_bh = NULL;
2716         struct ocfs2_dinode *di;
2717         int ret, credits;
2718         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2719         struct inode *tl_inode = osb->osb_tl_inode;
2720         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2721
2722         struct ocfs2_xattr_info xi = {
2723                 .name_index = name_index,
2724                 .name = name,
2725                 .value = value,
2726                 .value_len = value_len,
2727         };
2728
2729         struct ocfs2_xattr_search xis = {
2730                 .not_found = -ENODATA,
2731         };
2732
2733         struct ocfs2_xattr_search xbs = {
2734                 .not_found = -ENODATA,
2735         };
2736
2737         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2738                 return -EOPNOTSUPP;
2739
2740         /*
2741          * Only xbs will be used on indexed trees.  xis doesn't need a
2742          * bucket.
2743          */
2744         xbs.bucket = ocfs2_xattr_bucket_new(inode);
2745         if (!xbs.bucket) {
2746                 mlog_errno(-ENOMEM);
2747                 return -ENOMEM;
2748         }
2749
2750         ret = ocfs2_inode_lock(inode, &di_bh, 1);
2751         if (ret < 0) {
2752                 mlog_errno(ret);
2753                 goto cleanup_nolock;
2754         }
2755         xis.inode_bh = xbs.inode_bh = di_bh;
2756         di = (struct ocfs2_dinode *)di_bh->b_data;
2757
2758         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2759         /*
2760          * Scan inode and external block to find the same name
2761          * extended attribute and collect search infomation.
2762          */
2763         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2764         if (ret)
2765                 goto cleanup;
2766         if (xis.not_found) {
2767                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2768                 if (ret)
2769                         goto cleanup;
2770         }
2771
2772         if (xis.not_found && xbs.not_found) {
2773                 ret = -ENODATA;
2774                 if (flags & XATTR_REPLACE)
2775                         goto cleanup;
2776                 ret = 0;
2777                 if (!value)
2778                         goto cleanup;
2779         } else {
2780                 ret = -EEXIST;
2781                 if (flags & XATTR_CREATE)
2782                         goto cleanup;
2783         }
2784
2785
2786         mutex_lock(&tl_inode->i_mutex);
2787
2788         if (ocfs2_truncate_log_needs_flush(osb)) {
2789                 ret = __ocfs2_flush_truncate_log(osb);
2790                 if (ret < 0) {
2791                         mutex_unlock(&tl_inode->i_mutex);
2792                         mlog_errno(ret);
2793                         goto cleanup;
2794                 }
2795         }
2796         mutex_unlock(&tl_inode->i_mutex);
2797
2798         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2799                                         &xbs, &ctxt, &credits);
2800         if (ret) {
2801                 mlog_errno(ret);
2802                 goto cleanup;
2803         }
2804
2805         /* we need to update inode's ctime field, so add credit for it. */
2806         credits += OCFS2_INODE_UPDATE_CREDITS;
2807         ctxt.handle = ocfs2_start_trans(osb, credits);
2808         if (IS_ERR(ctxt.handle)) {
2809                 ret = PTR_ERR(ctxt.handle);
2810                 mlog_errno(ret);
2811                 goto cleanup;
2812         }
2813
2814         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2815
2816         ocfs2_commit_trans(osb, ctxt.handle);
2817
2818         if (ctxt.data_ac)
2819                 ocfs2_free_alloc_context(ctxt.data_ac);
2820         if (ctxt.meta_ac)
2821                 ocfs2_free_alloc_context(ctxt.meta_ac);
2822         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2823                 ocfs2_schedule_truncate_log_flush(osb, 1);
2824         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2825 cleanup:
2826         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2827         ocfs2_inode_unlock(inode, 1);
2828 cleanup_nolock:
2829         brelse(di_bh);
2830         brelse(xbs.xattr_bh);
2831         ocfs2_xattr_bucket_free(xbs.bucket);
2832
2833         return ret;
2834 }
2835
2836 /*
2837  * Find the xattr extent rec which may contains name_hash.
2838  * e_cpos will be the first name hash of the xattr rec.
2839  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2840  */
2841 static int ocfs2_xattr_get_rec(struct inode *inode,
2842                                u32 name_hash,
2843                                u64 *p_blkno,
2844                                u32 *e_cpos,
2845                                u32 *num_clusters,
2846                                struct ocfs2_extent_list *el)
2847 {
2848         int ret = 0, i;
2849         struct buffer_head *eb_bh = NULL;
2850         struct ocfs2_extent_block *eb;
2851         struct ocfs2_extent_rec *rec = NULL;
2852         u64 e_blkno = 0;
2853
2854         if (el->l_tree_depth) {
2855                 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2856                 if (ret) {
2857                         mlog_errno(ret);
2858                         goto out;
2859                 }
2860
2861                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2862                 el = &eb->h_list;
2863
2864                 if (el->l_tree_depth) {
2865                         ocfs2_error(inode->i_sb,
2866                                     "Inode %lu has non zero tree depth in "
2867                                     "xattr tree block %llu\n", inode->i_ino,
2868                                     (unsigned long long)eb_bh->b_blocknr);
2869                         ret = -EROFS;
2870                         goto out;
2871                 }
2872         }
2873
2874         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2875                 rec = &el->l_recs[i];
2876
2877                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2878                         e_blkno = le64_to_cpu(rec->e_blkno);
2879                         break;
2880                 }
2881         }
2882
2883         if (!e_blkno) {
2884                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2885                             "record (%u, %u, 0) in xattr", inode->i_ino,
2886                             le32_to_cpu(rec->e_cpos),
2887                             ocfs2_rec_clusters(el, rec));
2888                 ret = -EROFS;
2889                 goto out;
2890         }
2891
2892         *p_blkno = le64_to_cpu(rec->e_blkno);
2893         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2894         if (e_cpos)
2895                 *e_cpos = le32_to_cpu(rec->e_cpos);
2896 out:
2897         brelse(eb_bh);
2898         return ret;
2899 }
2900
2901 typedef int (xattr_bucket_func)(struct inode *inode,
2902                                 struct ocfs2_xattr_bucket *bucket,
2903                                 void *para);
2904
2905 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2906                                    struct ocfs2_xattr_bucket *bucket,
2907                                    int name_index,
2908                                    const char *name,
2909                                    u32 name_hash,
2910                                    u16 *xe_index,
2911                                    int *found)
2912 {
2913         int i, ret = 0, cmp = 1, block_off, new_offset;
2914         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2915         size_t name_len = strlen(name);
2916         struct ocfs2_xattr_entry *xe = NULL;
2917         char *xe_name;
2918
2919         /*
2920          * We don't use binary search in the bucket because there
2921          * may be multiple entries with the same name hash.
2922          */
2923         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2924                 xe = &xh->xh_entries[i];
2925
2926                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2927                         continue;
2928                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2929                         break;
2930
2931                 cmp = name_index - ocfs2_xattr_get_type(xe);
2932                 if (!cmp)
2933                         cmp = name_len - xe->xe_name_len;
2934                 if (cmp)
2935                         continue;
2936
2937                 ret = ocfs2_xattr_bucket_get_name_value(inode,
2938                                                         xh,
2939                                                         i,
2940                                                         &block_off,
2941                                                         &new_offset);
2942                 if (ret) {
2943                         mlog_errno(ret);
2944                         break;
2945                 }
2946
2947
2948                 xe_name = bucket_block(bucket, block_off) + new_offset;
2949                 if (!memcmp(name, xe_name, name_len)) {
2950                         *xe_index = i;
2951                         *found = 1;
2952                         ret = 0;
2953                         break;
2954                 }
2955         }
2956
2957         return ret;
2958 }
2959
2960 /*
2961  * Find the specified xattr entry in a series of buckets.
2962  * This series start from p_blkno and last for num_clusters.
2963  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2964  * the num of the valid buckets.
2965  *
2966  * Return the buffer_head this xattr should reside in. And if the xattr's
2967  * hash is in the gap of 2 buckets, return the lower bucket.
2968  */
2969 static int ocfs2_xattr_bucket_find(struct inode *inode,
2970                                    int name_index,
2971                                    const char *name,
2972                                    u32 name_hash,
2973                                    u64 p_blkno,
2974                                    u32 first_hash,
2975                                    u32 num_clusters,
2976                                    struct ocfs2_xattr_search *xs)
2977 {
2978         int ret, found = 0;
2979         struct ocfs2_xattr_header *xh = NULL;
2980         struct ocfs2_xattr_entry *xe = NULL;
2981         u16 index = 0;
2982         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2983         int low_bucket = 0, bucket, high_bucket;
2984         struct ocfs2_xattr_bucket *search;
2985         u32 last_hash;
2986         u64 blkno, lower_blkno = 0;
2987
2988         search = ocfs2_xattr_bucket_new(inode);
2989         if (!search) {
2990                 ret = -ENOMEM;
2991                 mlog_errno(ret);
2992                 goto out;
2993         }
2994
2995         ret = ocfs2_read_xattr_bucket(search, p_blkno);
2996         if (ret) {
2997                 mlog_errno(ret);
2998                 goto out;
2999         }
3000
3001         xh = bucket_xh(search);
3002         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3003         while (low_bucket <= high_bucket) {
3004                 ocfs2_xattr_bucket_relse(search);
3005
3006                 bucket = (low_bucket + high_bucket) / 2;
3007                 blkno = p_blkno + bucket * blk_per_bucket;
3008                 ret = ocfs2_read_xattr_bucket(search, blkno);
3009                 if (ret) {
3010                         mlog_errno(ret);
3011                         goto out;
3012                 }
3013
3014                 xh = bucket_xh(search);
3015                 xe = &xh->xh_entries[0];
3016                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3017                         high_bucket = bucket - 1;
3018                         continue;
3019                 }
3020
3021                 /*
3022                  * Check whether the hash of the last entry in our
3023                  * bucket is larger than the search one. for an empty
3024                  * bucket, the last one is also the first one.
3025                  */
3026                 if (xh->xh_count)
3027                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3028
3029                 last_hash = le32_to_cpu(xe->xe_name_hash);
3030
3031                 /* record lower_blkno which may be the insert place. */
3032                 lower_blkno = blkno;
3033
3034                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3035                         low_bucket = bucket + 1;
3036                         continue;
3037                 }
3038
3039                 /* the searched xattr should reside in this bucket if exists. */
3040                 ret = ocfs2_find_xe_in_bucket(inode, search,
3041                                               name_index, name, name_hash,
3042                                               &index, &found);
3043                 if (ret) {
3044                         mlog_errno(ret);
3045                         goto out;
3046                 }
3047                 break;
3048         }
3049
3050         /*
3051          * Record the bucket we have found.
3052          * When the xattr's hash value is in the gap of 2 buckets, we will
3053          * always set it to the previous bucket.
3054          */
3055         if (!lower_blkno)
3056                 lower_blkno = p_blkno;
3057
3058         /* This should be in cache - we just read it during the search */
3059         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3060         if (ret) {
3061                 mlog_errno(ret);
3062                 goto out;
3063         }
3064
3065         xs->header = bucket_xh(xs->bucket);
3066         xs->base = bucket_block(xs->bucket, 0);
3067         xs->end = xs->base + inode->i_sb->s_blocksize;
3068
3069         if (found) {
3070                 xs->here = &xs->header->xh_entries[index];
3071                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3072                      (unsigned long long)bucket_blkno(xs->bucket), index);
3073         } else
3074                 ret = -ENODATA;
3075
3076 out:
3077         ocfs2_xattr_bucket_free(search);
3078         return ret;
3079 }
3080
3081 static int ocfs2_xattr_index_block_find(struct inode *inode,
3082                                         struct buffer_head *root_bh,
3083                                         int name_index,
3084                                         const char *name,
3085                                         struct ocfs2_xattr_search *xs)
3086 {
3087         int ret;
3088         struct ocfs2_xattr_block *xb =
3089                         (struct ocfs2_xattr_block *)root_bh->b_data;
3090         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3091         struct ocfs2_extent_list *el = &xb_root->xt_list;
3092         u64 p_blkno = 0;
3093         u32 first_hash, num_clusters = 0;
3094         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3095
3096         if (le16_to_cpu(el->l_next_free_rec) == 0)
3097                 return -ENODATA;
3098
3099         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3100              name, name_hash, name_index);
3101
3102         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3103                                   &num_clusters, el);
3104         if (ret) {
3105                 mlog_errno(ret);
3106                 goto out;
3107         }
3108
3109         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3110
3111         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3112              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3113              first_hash);
3114
3115         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3116                                       p_blkno, first_hash, num_clusters, xs);
3117
3118 out:
3119         return ret;
3120 }
3121
3122 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3123                                        u64 blkno,
3124                                        u32 clusters,
3125                                        xattr_bucket_func *func,
3126                                        void *para)
3127 {
3128         int i, ret = 0;
3129         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3130         u32 num_buckets = clusters * bpc;
3131         struct ocfs2_xattr_bucket *bucket;
3132
3133         bucket = ocfs2_xattr_bucket_new(inode);
3134         if (!bucket) {
3135                 mlog_errno(-ENOMEM);
3136                 return -ENOMEM;
3137         }
3138
3139         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3140              clusters, (unsigned long long)blkno);
3141
3142         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3143                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3144                 if (ret) {
3145                         mlog_errno(ret);
3146                         break;
3147                 }
3148
3149                 /*
3150                  * The real bucket num in this series of blocks is stored
3151                  * in the 1st bucket.
3152                  */
3153                 if (i == 0)
3154                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3155
3156                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3157                      (unsigned long long)blkno,
3158                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3159                 if (func) {
3160                         ret = func(inode, bucket, para);
3161                         if (ret)
3162                                 mlog_errno(ret);
3163                         /* Fall through to bucket_relse() */
3164                 }
3165
3166                 ocfs2_xattr_bucket_relse(bucket);
3167                 if (ret)
3168                         break;
3169         }
3170
3171         ocfs2_xattr_bucket_free(bucket);
3172         return ret;
3173 }
3174
3175 struct ocfs2_xattr_tree_list {
3176         char *buffer;
3177         size_t buffer_size;
3178         size_t result;
3179 };
3180
3181 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
3182                                              struct ocfs2_xattr_header *xh,
3183                                              int index,
3184                                              int *block_off,
3185                                              int *new_offset)
3186 {
3187         u16 name_offset;
3188
3189         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3190                 return -EINVAL;
3191
3192         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3193
3194         *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
3195         *new_offset = name_offset % inode->i_sb->s_blocksize;
3196
3197         return 0;
3198 }
3199
3200 static int ocfs2_list_xattr_bucket(struct inode *inode,
3201                                    struct ocfs2_xattr_bucket *bucket,
3202                                    void *para)
3203 {
3204         int ret = 0, type;
3205         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3206         int i, block_off, new_offset;
3207         const char *prefix, *name;
3208
3209         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3210                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3211                 type = ocfs2_xattr_get_type(entry);
3212                 prefix = ocfs2_xattr_prefix(type);
3213
3214                 if (prefix) {
3215                         ret = ocfs2_xattr_bucket_get_name_value(inode,
3216                                                                 bucket_xh(bucket),
3217                                                                 i,
3218                                                                 &block_off,
3219                                                                 &new_offset);
3220                         if (ret)
3221                                 break;
3222
3223                         name = (const char *)bucket_block(bucket, block_off) +
3224                                 new_offset;
3225                         ret = ocfs2_xattr_list_entry(xl->buffer,
3226                                                      xl->buffer_size,
3227                                                      &xl->result,
3228                                                      prefix, name,
3229                                                      entry->xe_name_len);
3230                         if (ret)
3231                                 break;
3232                 }
3233         }
3234
3235         return ret;
3236 }
3237
3238 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3239                                              struct ocfs2_xattr_tree_root *xt,
3240                                              char *buffer,
3241                                              size_t buffer_size)
3242 {
3243         struct ocfs2_extent_list *el = &xt->xt_list;
3244         int ret = 0;
3245         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3246         u64 p_blkno = 0;
3247         struct ocfs2_xattr_tree_list xl = {
3248                 .buffer = buffer,
3249                 .buffer_size = buffer_size,
3250                 .result = 0,
3251         };
3252
3253         if (le16_to_cpu(el->l_next_free_rec) == 0)
3254                 return 0;
3255
3256         while (name_hash > 0) {
3257                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3258                                           &e_cpos, &num_clusters, el);
3259                 if (ret) {
3260                         mlog_errno(ret);
3261                         goto out;
3262                 }
3263
3264                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
3265                                                   ocfs2_list_xattr_bucket,
3266                                                   &xl);
3267                 if (ret) {
3268                         mlog_errno(ret);
3269                         goto out;
3270                 }
3271
3272                 if (e_cpos == 0)
3273                         break;
3274
3275                 name_hash = e_cpos - 1;
3276         }
3277
3278         ret = xl.result;
3279 out:
3280         return ret;
3281 }
3282
3283 static int cmp_xe(const void *a, const void *b)
3284 {
3285         const struct ocfs2_xattr_entry *l = a, *r = b;
3286         u32 l_hash = le32_to_cpu(l->xe_name_hash);
3287         u32 r_hash = le32_to_cpu(r->xe_name_hash);
3288
3289         if (l_hash > r_hash)
3290                 return 1;
3291         if (l_hash < r_hash)
3292                 return -1;
3293         return 0;
3294 }
3295
3296 static void swap_xe(void *a, void *b, int size)
3297 {
3298         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3299
3300         tmp = *l;
3301         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3302         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3303 }
3304
3305 /*
3306  * When the ocfs2_xattr_block is filled up, new bucket will be created
3307  * and all the xattr entries will be moved to the new bucket.
3308  * The header goes at the start of the bucket, and the names+values are
3309  * filled from the end.  This is why *target starts as the last buffer.
3310  * Note: we need to sort the entries since they are not saved in order
3311  * in the ocfs2_xattr_block.
3312  */
3313 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3314                                            struct buffer_head *xb_bh,
3315                                            struct ocfs2_xattr_bucket *bucket)
3316 {
3317         int i, blocksize = inode->i_sb->s_blocksize;
3318         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3319         u16 offset, size, off_change;
3320         struct ocfs2_xattr_entry *xe;
3321         struct ocfs2_xattr_block *xb =
3322                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
3323         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3324         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3325         u16 count = le16_to_cpu(xb_xh->xh_count);
3326         char *src = xb_bh->b_data;
3327         char *target = bucket_block(bucket, blks - 1);
3328
3329         mlog(0, "cp xattr from block %llu to bucket %llu\n",
3330              (unsigned long long)xb_bh->b_blocknr,
3331              (unsigned long long)bucket_blkno(bucket));
3332
3333         for (i = 0; i < blks; i++)
3334                 memset(bucket_block(bucket, i), 0, blocksize);
3335
3336         /*
3337          * Since the xe_name_offset is based on ocfs2_xattr_header,
3338          * there is a offset change corresponding to the change of
3339          * ocfs2_xattr_header's position.
3340          */
3341         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3342         xe = &xb_xh->xh_entries[count - 1];
3343         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3344         size = blocksize - offset;
3345
3346         /* copy all the names and values. */
3347         memcpy(target + offset, src + offset, size);
3348
3349         /* Init new header now. */
3350         xh->xh_count = xb_xh->xh_count;
3351         xh->xh_num_buckets = cpu_to_le16(1);
3352         xh->xh_name_value_len = cpu_to_le16(size);
3353         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3354
3355         /* copy all the entries. */
3356         target = bucket_block(bucket, 0);
3357         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3358         size = count * sizeof(struct ocfs2_xattr_entry);
3359         memcpy(target + offset, (char *)xb_xh + offset, size);
3360
3361         /* Change the xe offset for all the xe because of the move. */
3362         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3363                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3364         for (i = 0; i < count; i++)
3365                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3366
3367         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3368              offset, size, off_change);
3369
3370         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3371              cmp_xe, swap_xe);
3372 }
3373
3374 /*
3375  * After we move xattr from block to index btree, we have to
3376  * update ocfs2_xattr_search to the new xe and base.
3377  *
3378  * When the entry is in xattr block, xattr_bh indicates the storage place.
3379  * While if the entry is in index b-tree, "bucket" indicates the
3380  * real place of the xattr.
3381  */
3382 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3383                                             struct ocfs2_xattr_search *xs,
3384                                             struct buffer_head *old_bh)
3385 {
3386         char *buf = old_bh->b_data;
3387         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3388         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3389         int i;
3390
3391         xs->header = bucket_xh(xs->bucket);
3392         xs->base = bucket_block(xs->bucket, 0);
3393         xs->end = xs->base + inode->i_sb->s_blocksize;
3394
3395         if (xs->not_found)
3396                 return;
3397
3398         i = xs->here - old_xh->xh_entries;
3399         xs->here = &xs->header->xh_entries[i];
3400 }
3401
3402 static int ocfs2_xattr_create_index_block(struct inode *inode,
3403                                           struct ocfs2_xattr_search *xs,
3404                                           struct ocfs2_xattr_set_ctxt *ctxt)
3405 {
3406         int ret;
3407         u32 bit_off, len;
3408         u64 blkno;
3409         handle_t *handle = ctxt->handle;
3410         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3411         struct ocfs2_inode_info *oi = OCFS2_I(inode);
3412         struct buffer_head *xb_bh = xs->xattr_bh;
3413         struct ocfs2_xattr_block *xb =
3414                         (struct ocfs2_xattr_block *)xb_bh->b_data;
3415         struct ocfs2_xattr_tree_root *xr;
3416         u16 xb_flags = le16_to_cpu(xb->xb_flags);
3417
3418         mlog(0, "create xattr index block for %llu\n",
3419              (unsigned long long)xb_bh->b_blocknr);
3420
3421         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3422         BUG_ON(!xs->bucket);
3423
3424         /*
3425          * XXX:
3426          * We can use this lock for now, and maybe move to a dedicated mutex
3427          * if performance becomes a problem later.
3428          */
3429         down_write(&oi->ip_alloc_sem);
3430
3431         ret = ocfs2_journal_access_xb(handle, inode, xb_bh,
3432                                       OCFS2_JOURNAL_ACCESS_WRITE);
3433         if (ret) {
3434                 mlog_errno(ret);
3435                 goto out;
3436         }
3437
3438         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3439                                      1, 1, &bit_off, &len);
3440         if (ret) {
3441                 mlog_errno(ret);
3442                 goto out;
3443         }
3444
3445         /*
3446          * The bucket may spread in many blocks, and
3447          * we will only touch the 1st block and the last block
3448          * in the whole bucket(one for entry and one for data).
3449          */
3450         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3451
3452         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3453              (unsigned long long)blkno);
3454
3455         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3456         if (ret) {
3457                 mlog_errno(ret);
3458                 goto out;
3459         }
3460
3461         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3462                                                 OCFS2_JOURNAL_ACCESS_CREATE);
3463         if (ret) {
3464                 mlog_errno(ret);
3465                 goto out;
3466         }
3467
3468         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3469         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3470
3471         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3472
3473         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3474         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3475                offsetof(struct ocfs2_xattr_block, xb_attrs));
3476
3477         xr = &xb->xb_attrs.xb_root;
3478         xr->xt_clusters = cpu_to_le32(1);
3479         xr->xt_last_eb_blk = 0;
3480         xr->xt_list.l_tree_depth = 0;
3481         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3482         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3483
3484         xr->xt_list.l_recs[0].e_cpos = 0;
3485         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3486         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3487
3488         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3489
3490         ocfs2_journal_dirty(handle, xb_bh);
3491
3492 out:
3493         up_write(&oi->ip_alloc_sem);
3494
3495         return ret;
3496 }
3497
3498 static int cmp_xe_offset(const void *a, const void *b)
3499 {
3500         const struct ocfs2_xattr_entry *l = a, *r = b;
3501         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3502         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3503
3504         if (l_name_offset < r_name_offset)
3505                 return 1;
3506         if (l_name_offset > r_name_offset)
3507                 return -1;
3508         return 0;
3509 }
3510
3511 /*
3512  * defrag a xattr bucket if we find that the bucket has some
3513  * holes beteen name/value pairs.
3514  * We will move all the name/value pairs to the end of the bucket
3515  * so that we can spare some space for insertion.
3516  */
3517 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3518                                      handle_t *handle,
3519                                      struct ocfs2_xattr_bucket *bucket)
3520 {
3521         int ret, i;
3522         size_t end, offset, len, value_len;
3523         struct ocfs2_xattr_header *xh;
3524         char *entries, *buf, *bucket_buf = NULL;
3525         u64 blkno = bucket_blkno(bucket);
3526         u16 xh_free_start;
3527         size_t blocksize = inode->i_sb->s_blocksize;
3528         struct ocfs2_xattr_entry *xe;
3529
3530         /*
3531          * In order to make the operation more efficient and generic,
3532          * we copy all the blocks into a contiguous memory and do the
3533          * defragment there, so if anything is error, we will not touch
3534          * the real block.
3535          */
3536         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3537         if (!bucket_buf) {
3538                 ret = -EIO;
3539                 goto out;
3540         }
3541
3542         buf = bucket_buf;
3543         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3544                 memcpy(buf, bucket_block(bucket, i), blocksize);
3545
3546         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3547                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3548         if (ret < 0) {
3549                 mlog_errno(ret);
3550                 goto out;
3551         }
3552
3553         xh = (struct ocfs2_xattr_header *)bucket_buf;
3554         entries = (char *)xh->xh_entries;
3555         xh_free_start = le16_to_cpu(xh->xh_free_start);
3556
3557         mlog(0, "adjust xattr bucket in %llu, count = %u, "
3558              "xh_free_start = %u, xh_name_value_len = %u.\n",
3559              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3560              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3561
3562         /*
3563          * sort all the entries by their offset.
3564          * the largest will be the first, so that we can
3565          * move them to the end one by one.
3566          */
3567         sort(entries, le16_to_cpu(xh->xh_count),
3568              sizeof(struct ocfs2_xattr_entry),
3569              cmp_xe_offset, swap_xe);
3570
3571         /* Move all name/values to the end of the bucket. */
3572         xe = xh->xh_entries;
3573         end = OCFS2_XATTR_BUCKET_SIZE;
3574         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3575                 offset = le16_to_cpu(xe->xe_name_offset);
3576                 if (ocfs2_xattr_is_local(xe))
3577                         value_len = OCFS2_XATTR_SIZE(
3578                                         le64_to_cpu(xe->xe_value_size));
3579                 else
3580                         value_len = OCFS2_XATTR_ROOT_SIZE;
3581                 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3582
3583                 /*
3584                  * We must make sure that the name/value pair
3585                  * exist in the same block. So adjust end to
3586                  * the previous block end if needed.
3587                  */
3588                 if (((end - len) / blocksize !=
3589                         (end - 1) / blocksize))
3590                         end = end - end % blocksize;
3591
3592                 if (end > offset + len) {
3593                         memmove(bucket_buf + end - len,
3594                                 bucket_buf + offset, len);
3595                         xe->xe_name_offset = cpu_to_le16(end - len);
3596                 }
3597
3598                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3599                                 "bucket %llu\n", (unsigned long long)blkno);
3600
3601                 end -= len;
3602         }
3603
3604         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3605                         "bucket %llu\n", (unsigned long long)blkno);
3606
3607         if (xh_free_start == end)
3608                 goto out;
3609
3610         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3611         xh->xh_free_start = cpu_to_le16(end);
3612
3613         /* sort the entries by their name_hash. */
3614         sort(entries, le16_to_cpu(xh->xh_count),
3615              sizeof(struct ocfs2_xattr_entry),
3616              cmp_xe, swap_xe);
3617
3618         buf = bucket_buf;
3619         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3620                 memcpy(bucket_block(bucket, i), buf, blocksize);
3621         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3622
3623 out:
3624         kfree(bucket_buf);
3625         return ret;
3626 }
3627
3628 /*
3629  * prev_blkno points to the start of an existing extent.  new_blkno
3630  * points to a newly allocated extent.  Because we know each of our
3631  * clusters contains more than bucket, we can easily split one cluster
3632  * at a bucket boundary.  So we take the last cluster of the existing
3633  * extent and split it down the middle.  We move the last half of the
3634  * buckets in the last cluster of the existing extent over to the new
3635  * extent.
3636  *
3637  * first_bh is the buffer at prev_blkno so we can update the existing
3638  * extent's bucket count.  header_bh is the bucket were we were hoping
3639  * to insert our xattr.  If the bucket move places the target in the new
3640  * extent, we'll update first_bh and header_bh after modifying the old
3641  * extent.
3642  *
3643  * first_hash will be set as the 1st xe's name_hash in the new extent.
3644  */
3645 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3646                                                handle_t *handle,
3647                                                struct ocfs2_xattr_bucket *first,
3648                                                struct ocfs2_xattr_bucket *target,
3649                                                u64 new_blkno,
3650                                                u32 num_clusters,
3651                                                u32 *first_hash)
3652 {
3653         int ret;
3654         struct super_block *sb = inode->i_sb;
3655         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
3656         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
3657         int to_move = num_buckets / 2;
3658         u64 src_blkno;
3659         u64 last_cluster_blkno = bucket_blkno(first) +
3660                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
3661
3662         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
3663         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
3664
3665         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3666              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
3667
3668         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
3669                                      last_cluster_blkno, new_blkno,
3670                                      to_move, first_hash);
3671         if (ret) {
3672                 mlog_errno(ret);
3673                 goto out;
3674         }
3675
3676         /* This is the first bucket that got moved */
3677         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
3678
3679         /*
3680          * If the target bucket was part of the moved buckets, we need to
3681          * update first and target.
3682          */
3683         if (bucket_blkno(target) >= src_blkno) {
3684                 /* Find the block for the new target bucket */
3685                 src_blkno = new_blkno +
3686                         (bucket_blkno(target) - src_blkno);
3687
3688                 ocfs2_xattr_bucket_relse(first);
3689                 ocfs2_xattr_bucket_relse(target);
3690
3691                 /*
3692                  * These shouldn't fail - the buffers are in the
3693                  * journal from ocfs2_cp_xattr_bucket().
3694                  */
3695                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
3696                 if (ret) {
3697                         mlog_errno(ret);
3698                         goto out;
3699                 }
3700                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
3701                 if (ret)
3702                         mlog_errno(ret);
3703
3704         }
3705
3706 out:
3707         return ret;
3708 }
3709
3710 /*
3711  * Find the suitable pos when we divide a bucket into 2.
3712  * We have to make sure the xattrs with the same hash value exist
3713  * in the same bucket.
3714  *
3715  * If this ocfs2_xattr_header covers more than one hash value, find a
3716  * place where the hash value changes.  Try to find the most even split.
3717  * The most common case is that all entries have different hash values,
3718  * and the first check we make will find a place to split.
3719  */
3720 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3721 {
3722         struct ocfs2_xattr_entry *entries = xh->xh_entries;
3723         int count = le16_to_cpu(xh->xh_count);
3724         int delta, middle = count / 2;
3725
3726         /*
3727          * We start at the middle.  Each step gets farther away in both
3728          * directions.  We therefore hit the change in hash value
3729          * nearest to the middle.  Note that this loop does not execute for
3730          * count < 2.
3731          */
3732         for (delta = 0; delta < middle; delta++) {
3733                 /* Let's check delta earlier than middle */
3734                 if (cmp_xe(&entries[middle - delta - 1],
3735                            &entries[middle - delta]))
3736                         return middle - delta;
3737
3738                 /* For even counts, don't walk off the end */
3739                 if ((middle + delta + 1) == count)
3740                         continue;
3741
3742                 /* Now try delta past middle */
3743                 if (cmp_xe(&entries[middle + delta],
3744                            &entries[middle + delta + 1]))
3745                         return middle + delta + 1;
3746         }
3747
3748         /* Every entry had the same hash */
3749         return count;
3750 }
3751
3752 /*
3753  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3754  * first_hash will record the 1st hash of the new bucket.
3755  *
3756  * Normally half of the xattrs will be moved.  But we have to make
3757  * sure that the xattrs with the same hash value are stored in the
3758  * same bucket. If all the xattrs in this bucket have the same hash
3759  * value, the new bucket will be initialized as an empty one and the
3760  * first_hash will be initialized as (hash_value+1).
3761  */
3762 static int ocfs2_divide_xattr_bucket(struct inode *inode,
3763                                     handle_t *handle,
3764                                     u64 blk,
3765                                     u64 new_blk,
3766                                     u32 *first_hash,
3767                                     int new_bucket_head)
3768 {
3769         int ret, i;
3770         int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3771         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3772         struct ocfs2_xattr_header *xh;
3773         struct ocfs2_xattr_entry *xe;
3774         int blocksize = inode->i_sb->s_blocksize;
3775
3776         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3777              (unsigned long long)blk, (unsigned long long)new_blk);
3778
3779         s_bucket = ocfs2_xattr_bucket_new(inode);
3780         t_bucket = ocfs2_xattr_bucket_new(inode);
3781         if (!s_bucket || !t_bucket) {
3782                 ret = -ENOMEM;
3783                 mlog_errno(ret);
3784                 goto out;
3785         }
3786
3787         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3788         if (ret) {
3789                 mlog_errno(ret);
3790                 goto out;
3791         }
3792
3793         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3794                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3795         if (ret) {
3796                 mlog_errno(ret);
3797                 goto out;
3798         }
3799
3800         /*
3801          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
3802          * there's no need to read it.
3803          */
3804         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3805         if (ret) {
3806                 mlog_errno(ret);
3807                 goto out;
3808         }
3809
3810         /*
3811          * Hey, if we're overwriting t_bucket, what difference does
3812          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
3813          * same part of ocfs2_cp_xattr_bucket().
3814          */
3815         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3816                                                 new_bucket_head ?
3817                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3818                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3819         if (ret) {
3820                 mlog_errno(ret);
3821                 goto out;
3822         }
3823
3824         xh = bucket_xh(s_bucket);
3825         count = le16_to_cpu(xh->xh_count);
3826         start = ocfs2_xattr_find_divide_pos(xh);
3827
3828         if (start == count) {
3829                 xe = &xh->xh_entries[start-1];
3830
3831                 /*
3832                  * initialized a new empty bucket here.
3833                  * The hash value is set as one larger than
3834                  * that of the last entry in the previous bucket.
3835                  */
3836                 for (i = 0; i < t_bucket->bu_blocks; i++)
3837                         memset(bucket_block(t_bucket, i), 0, blocksize);
3838
3839                 xh = bucket_xh(t_bucket);
3840                 xh->xh_free_start = cpu_to_le16(blocksize);
3841                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3842                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3843
3844                 goto set_num_buckets;
3845         }
3846
3847         /* copy the whole bucket to the new first. */
3848         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3849
3850         /* update the new bucket. */
3851         xh = bucket_xh(t_bucket);
3852
3853         /*
3854          * Calculate the total name/value len and xh_free_start for
3855          * the old bucket first.
3856          */
3857         name_offset = OCFS2_XATTR_BUCKET_SIZE;
3858         name_value_len = 0;
3859         for (i = 0; i < start; i++) {
3860                 xe = &xh->xh_entries[i];
3861                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3862                 if (ocfs2_xattr_is_local(xe))
3863                         xe_len +=
3864                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3865                 else
3866                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3867                 name_value_len += xe_len;
3868                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3869                         name_offset = le16_to_cpu(xe->xe_name_offset);
3870         }
3871
3872         /*
3873          * Now begin the modification to the new bucket.
3874          *
3875          * In the new bucket, We just move the xattr entry to the beginning
3876          * and don't touch the name/value. So there will be some holes in the
3877          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3878          * called.
3879          */
3880         xe = &xh->xh_entries[start];
3881         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3882         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3883              (int)((char *)xe - (char *)xh),
3884              (int)((char *)xh->xh_entries - (char *)xh));
3885         memmove((char *)xh->xh_entries, (char *)xe, len);
3886         xe = &xh->xh_entries[count - start];
3887         len = sizeof(struct ocfs2_xattr_entry) * start;
3888         memset((char *)xe, 0, len);
3889
3890         le16_add_cpu(&xh->xh_count, -start);
3891         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3892
3893         /* Calculate xh_free_start for the new bucket. */
3894         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3895         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3896                 xe = &xh->xh_entries[i];
3897                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3898                 if (ocfs2_xattr_is_local(xe))
3899                         xe_len +=
3900                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3901                 else
3902                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3903                 if (le16_to_cpu(xe->xe_name_offset) <
3904                     le16_to_cpu(xh->xh_free_start))
3905                         xh->xh_free_start = xe->xe_name_offset;
3906         }
3907
3908 set_num_buckets:
3909         /* set xh->xh_num_buckets for the new xh. */
3910         if (new_bucket_head)
3911                 xh->xh_num_buckets = cpu_to_le16(1);
3912         else
3913                 xh->xh_num_buckets = 0;
3914
3915         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3916
3917         /* store the first_hash of the new bucket. */
3918         if (first_hash)
3919                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3920
3921         /*
3922          * Now only update the 1st block of the old bucket.  If we
3923          * just added a new empty bucket, there is no need to modify
3924          * it.
3925          */
3926         if (start == count)
3927                 goto out;
3928
3929         xh = bucket_xh(s_bucket);
3930         memset(&xh->xh_entries[start], 0,
3931                sizeof(struct ocfs2_xattr_entry) * (count - start));
3932         xh->xh_count = cpu_to_le16(start);
3933         xh->xh_free_start = cpu_to_le16(name_offset);
3934         xh->xh_name_value_len = cpu_to_le16(name_value_len);
3935
3936         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
3937
3938 out:
3939         ocfs2_xattr_bucket_free(s_bucket);
3940         ocfs2_xattr_bucket_free(t_bucket);
3941
3942         return ret;
3943 }
3944
3945 /*
3946  * Copy xattr from one bucket to another bucket.
3947  *
3948  * The caller must make sure that the journal transaction
3949  * has enough space for journaling.
3950  */
3951 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3952                                  handle_t *handle,
3953                                  u64 s_blkno,
3954                                  u64 t_blkno,
3955                                  int t_is_new)
3956 {
3957         int ret;
3958         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3959
3960         BUG_ON(s_blkno == t_blkno);
3961
3962         mlog(0, "cp bucket %llu to %llu, target is %d\n",
3963              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3964              t_is_new);
3965
3966         s_bucket = ocfs2_xattr_bucket_new(inode);
3967         t_bucket = ocfs2_xattr_bucket_new(inode);
3968         if (!s_bucket || !t_bucket) {
3969                 ret = -ENOMEM;
3970                 mlog_errno(ret);
3971                 goto out;
3972         }
3973
3974         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
3975         if (ret)
3976                 goto out;
3977
3978         /*
3979          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
3980          * there's no need to read it.
3981          */
3982         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
3983         if (ret)
3984                 goto out;
3985
3986         /*
3987          * Hey, if we're overwriting t_bucket, what difference does
3988          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
3989          * cluster to fill, we came here from
3990          * ocfs2_mv_xattr_buckets(), and it is really new -
3991          * ACCESS_CREATE is required.  But we also might have moved data
3992          * out of t_bucket before extending back into it.
3993          * ocfs2_add_new_xattr_bucket() can do this - its call to
3994          * ocfs2_add_new_xattr_cluster() may have created a new extent
3995          * and copied out the end of the old extent.  Then it re-extends
3996          * the old extent back to create space for new xattrs.  That's
3997          * how we get here, and the bucket isn't really new.
3998          */
3999         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4000                                                 t_is_new ?
4001                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4002                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4003         if (ret)
4004                 goto out;
4005
4006         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4007         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4008
4009 out:
4010         ocfs2_xattr_bucket_free(t_bucket);
4011         ocfs2_xattr_bucket_free(s_bucket);
4012
4013         return ret;
4014 }
4015
4016 /*
4017  * src_blk points to the start of an existing extent.  last_blk points to
4018  * last cluster in that extent.  to_blk points to a newly allocated
4019  * extent.  We copy the buckets from the cluster at last_blk to the new
4020  * extent.  If start_bucket is non-zero, we skip that many buckets before
4021  * we start copying.  The new extent's xh_num_buckets gets set to the
4022  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4023  * by the same amount.
4024  */
4025 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4026                                   u64 src_blk, u64 last_blk, u64 to_blk,
4027                                   unsigned int start_bucket,
4028                                   u32 *first_hash)
4029 {
4030         int i, ret, credits;
4031         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4032         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4033         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4034         struct ocfs2_xattr_bucket *old_first, *new_first;
4035
4036         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4037              (unsigned long long)last_blk, (unsigned long long)to_blk);
4038
4039         BUG_ON(start_bucket >= num_buckets);
4040         if (start_bucket) {
4041                 num_buckets -= start_bucket;
4042                 last_blk += (start_bucket * blks_per_bucket);
4043         }
4044
4045         /* The first bucket of the original extent */
4046         old_first = ocfs2_xattr_bucket_new(inode);
4047         /* The first bucket of the new extent */
4048         new_first = ocfs2_xattr_bucket_new(inode);
4049         if (!old_first || !new_first) {
4050                 ret = -ENOMEM;
4051                 mlog_errno(ret);
4052                 goto out;
4053         }
4054
4055         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4056         if (ret) {
4057                 mlog_errno(ret);
4058                 goto out;
4059         }
4060
4061         /*
4062          * We need to update the first bucket of the old extent and all
4063          * the buckets going to the new extent.
4064          */
4065         credits = ((num_buckets + 1) * blks_per_bucket) +
4066                 handle->h_buffer_credits;
4067         ret = ocfs2_extend_trans(handle, credits);
4068         if (ret) {
4069                 mlog_errno(ret);
4070                 goto out;
4071         }
4072
4073         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4074                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4075         if (ret) {
4076                 mlog_errno(ret);
4077                 goto out;
4078         }
4079
4080         for (i = 0; i < num_buckets; i++) {
4081                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4082                                             last_blk + (i * blks_per_bucket),
4083                                             to_blk + (i * blks_per_bucket),
4084                                             1);
4085                 if (ret) {
4086                         mlog_errno(ret);
4087                         goto out;
4088                 }
4089         }
4090
4091         /*
4092          * Get the new bucket ready before we dirty anything
4093          * (This actually shouldn't fail, because we already dirtied
4094          * it once in ocfs2_cp_xattr_bucket()).
4095          */
4096         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4097         if (ret) {
4098                 mlog_errno(ret);
4099                 goto out;
4100         }
4101         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4102                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4103         if (ret) {
4104                 mlog_errno(ret);
4105                 goto out;
4106         }
4107
4108         /* Now update the headers */
4109         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4110         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4111
4112         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4113         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4114
4115         if (first_hash)
4116                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4117
4118 out:
4119         ocfs2_xattr_bucket_free(new_first);
4120         ocfs2_xattr_bucket_free(old_first);
4121         return ret;
4122 }
4123
4124 /*
4125  * Move some xattrs in this cluster to the new cluster.
4126  * This function should only be called when bucket size == cluster size.
4127  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4128  */
4129 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4130                                       handle_t *handle,
4131                                       u64 prev_blk,
4132                                       u64 new_blk,
4133                                       u32 *first_hash)
4134 {
4135         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4136         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4137
4138         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4139
4140         ret = ocfs2_extend_trans(handle, credits);
4141         if (ret) {
4142                 mlog_errno(ret);
4143                 return ret;
4144         }
4145
4146         /* Move half of the xattr in start_blk to the next bucket. */
4147         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4148                                           new_blk, first_hash, 1);
4149 }
4150
4151 /*
4152  * Move some xattrs from the old cluster to the new one since they are not
4153  * contiguous in ocfs2 xattr tree.
4154  *
4155  * new_blk starts a new separate cluster, and we will move some xattrs from
4156  * prev_blk to it. v_start will be set as the first name hash value in this
4157  * new cluster so that it can be used as e_cpos during tree insertion and
4158  * don't collide with our original b-tree operations. first_bh and header_bh
4159  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4160  * to extend the insert bucket.
4161  *
4162  * The problem is how much xattr should we move to the new one and when should
4163  * we update first_bh and header_bh?
4164  * 1. If cluster size > bucket size, that means the previous cluster has more
4165  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4166  *    update the first_bh and header_bh if the insert bucket has been moved
4167  *    to the new cluster.
4168  * 2. If cluster_size == bucket_size:
4169  *    a) If the previous extent rec has more than one cluster and the insert
4170  *       place isn't in the last cluster, copy the entire last cluster to the
4171  *       new one. This time, we don't need to upate the first_bh and header_bh
4172  *       since they will not be moved into the new cluster.
4173  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4174  *       the new one. And we set the extend flag to zero if the insert place is
4175  *       moved into the new allocated cluster since no extend is needed.
4176  */
4177 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4178                                             handle_t *handle,
4179                                             struct ocfs2_xattr_bucket *first,
4180                                             struct ocfs2_xattr_bucket *target,
4181                                             u64 new_blk,
4182                                             u32 prev_clusters,
4183                                             u32 *v_start,
4184                                             int *extend)
4185 {
4186         int ret;
4187
4188         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4189              (unsigned long long)bucket_blkno(first), prev_clusters,
4190              (unsigned long long)new_blk);
4191
4192         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4193                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4194                                                           handle,
4195                                                           first, target,
4196                                                           new_blk,
4197                                                           prev_clusters,
4198                                                           v_start);
4199                 if (ret)
4200                         mlog_errno(ret);
4201         } else {
4202                 /* The start of the last cluster in the first extent */
4203                 u64 last_blk = bucket_blkno(first) +
4204                         ((prev_clusters - 1) *
4205                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
4206
4207                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4208                         ret = ocfs2_mv_xattr_buckets(inode, handle,
4209                                                      bucket_blkno(first),
4210                                                      last_blk, new_blk, 0,
4211                                                      v_start);
4212                         if (ret)
4213                                 mlog_errno(ret);
4214                 } else {
4215                         ret = ocfs2_divide_xattr_cluster(inode, handle,
4216                                                          last_blk, new_blk,
4217                                                          v_start);
4218                         if (ret)
4219                                 mlog_errno(ret);
4220
4221                         if ((bucket_blkno(target) == last_blk) && extend)
4222                                 *extend = 0;
4223                 }
4224         }
4225
4226         return ret;
4227 }
4228
4229 /*
4230  * Add a new cluster for xattr storage.
4231  *
4232  * If the new cluster is contiguous with the previous one, it will be
4233  * appended to the same extent record, and num_clusters will be updated.
4234  * If not, we will insert a new extent for it and move some xattrs in
4235  * the last cluster into the new allocated one.
4236  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4237  * lose the benefits of hashing because we'll have to search large leaves.
4238  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4239  * if it's bigger).
4240  *
4241  * first_bh is the first block of the previous extent rec and header_bh
4242  * indicates the bucket we will insert the new xattrs. They will be updated
4243  * when the header_bh is moved into the new cluster.
4244  */
4245 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4246                                        struct buffer_head *root_bh,
4247                                        struct ocfs2_xattr_bucket *first,
4248                                        struct ocfs2_xattr_bucket *target,
4249                                        u32 *num_clusters,
4250                                        u32 prev_cpos,
4251                                        int *extend,
4252                                        struct ocfs2_xattr_set_ctxt *ctxt)
4253 {
4254         int ret;
4255         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4256         u32 prev_clusters = *num_clusters;
4257         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4258         u64 block;
4259         handle_t *handle = ctxt->handle;
4260         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4261         struct ocfs2_extent_tree et;
4262
4263         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4264              "previous xattr blkno = %llu\n",
4265              (unsigned long long)OCFS2_I(inode)->ip_blkno,
4266              prev_cpos, (unsigned long long)bucket_blkno(first));
4267
4268         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4269
4270         ret = ocfs2_journal_access_xb(handle, inode, root_bh,
4271                                       OCFS2_JOURNAL_ACCESS_WRITE);
4272         if (ret < 0) {
4273                 mlog_errno(ret);
4274                 goto leave;
4275         }
4276
4277         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4278                                      clusters_to_add, &bit_off, &num_bits);
4279         if (ret < 0) {
4280                 if (ret != -ENOSPC)
4281                         mlog_errno(ret);
4282                 goto leave;
4283         }
4284
4285         BUG_ON(num_bits > clusters_to_add);
4286
4287         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4288         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4289              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4290
4291         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
4292             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4293              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4294                 /*
4295                  * If this cluster is contiguous with the old one and
4296                  * adding this new cluster, we don't surpass the limit of
4297                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4298                  * initialized and used like other buckets in the previous
4299                  * cluster.
4300                  * So add it as a contiguous one. The caller will handle
4301                  * its init process.
4302                  */
4303                 v_start = prev_cpos + prev_clusters;
4304                 *num_clusters = prev_clusters + num_bits;
4305                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4306                      num_bits);
4307         } else {
4308                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
4309                                                        handle,
4310                                                        first,
4311                                                        target,
4312                                                        block,
4313                                                        prev_clusters,
4314                                                        &v_start,
4315                                                        extend);
4316                 if (ret) {
4317                         mlog_errno(ret);
4318                         goto leave;
4319                 }
4320         }
4321
4322         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4323              num_bits, (unsigned long long)block, v_start);
4324         ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
4325                                   num_bits, 0, ctxt->meta_ac);
4326         if (ret < 0) {
4327                 mlog_errno(ret);
4328                 goto leave;
4329         }
4330
4331         ret = ocfs2_journal_dirty(handle, root_bh);
4332         if (ret < 0)
4333                 mlog_errno(ret);
4334
4335 leave:
4336         return ret;
4337 }
4338
4339 /*
4340  * We are given an extent.  'first' is the bucket at the very front of
4341  * the extent.  The extent has space for an additional bucket past
4342  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
4343  * of the target bucket.  We wish to shift every bucket past the target
4344  * down one, filling in that additional space.  When we get back to the
4345  * target, we split the target between itself and the now-empty bucket
4346  * at target+1 (aka, target_blkno + blks_per_bucket).
4347  */
4348 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4349                                      handle_t *handle,
4350                                      struct ocfs2_xattr_bucket *first,
4351                                      u64 target_blk,
4352                                      u32 num_clusters)
4353 {
4354         int ret, credits;
4355         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4356         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4357         u64 end_blk;
4358         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
4359
4360         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4361              "from %llu, len = %u\n", (unsigned long long)target_blk,
4362              (unsigned long long)bucket_blkno(first), num_clusters);
4363
4364         /* The extent must have room for an additional bucket */
4365         BUG_ON(new_bucket >=
4366                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
4367
4368         /* end_blk points to the last existing bucket */
4369         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
4370
4371         /*
4372          * end_blk is the start of the last existing bucket.
4373          * Thus, (end_blk - target_blk) covers the target bucket and
4374          * every bucket after it up to, but not including, the last
4375          * existing bucket.  Then we add the last existing bucket, the
4376          * new bucket, and the first bucket (3 * blk_per_bucket).
4377          */
4378         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
4379                   handle->h_buffer_credits;
4380         ret = ocfs2_extend_trans(handle, credits);
4381         if (ret) {
4382                 mlog_errno(ret);
4383                 goto out;
4384         }
4385
4386         ret = ocfs2_xattr_bucket_journal_access(handle, first,
4387                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4388         if (ret) {
4389                 mlog_errno(ret);
4390                 goto out;
4391         }
4392
4393         while (end_blk != target_blk) {
4394                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4395                                             end_blk + blk_per_bucket, 0);
4396                 if (ret)
4397                         goto out;
4398                 end_blk -= blk_per_bucket;
4399         }
4400
4401         /* Move half of the xattr in target_blkno to the next bucket. */
4402         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
4403                                         target_blk + blk_per_bucket, NULL, 0);
4404
4405         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
4406         ocfs2_xattr_bucket_journal_dirty(handle, first);
4407
4408 out:
4409         return ret;
4410 }
4411
4412 /*
4413  * Add new xattr bucket in an extent record and adjust the buckets
4414  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
4415  * bucket we want to insert into.
4416  *
4417  * In the easy case, we will move all the buckets after target down by
4418  * one. Half of target's xattrs will be moved to the next bucket.
4419  *
4420  * If current cluster is full, we'll allocate a new one.  This may not
4421  * be contiguous.  The underlying calls will make sure that there is
4422  * space for the insert, shifting buckets around if necessary.
4423  * 'target' may be moved by those calls.
4424  */
4425 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4426                                       struct buffer_head *xb_bh,
4427                                       struct ocfs2_xattr_bucket *target,
4428                                       struct ocfs2_xattr_set_ctxt *ctxt)
4429 {
4430         struct ocfs2_xattr_block *xb =
4431                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4432         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4433         struct ocfs2_extent_list *el = &xb_root->xt_list;
4434         u32 name_hash =
4435                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
4436         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4437         int ret, num_buckets, extend = 1;
4438         u64 p_blkno;
4439         u32 e_cpos, num_clusters;
4440         /* The bucket at the front of the extent */
4441         struct ocfs2_xattr_bucket *first;
4442
4443         mlog(0, "Add new xattr bucket starting from %llu\n",
4444              (unsigned long long)bucket_blkno(target));
4445
4446         /* The first bucket of the original extent */
4447         first = ocfs2_xattr_bucket_new(inode);
4448         if (!first) {
4449                 ret = -ENOMEM;
4450                 mlog_errno(ret);
4451                 goto out;
4452         }
4453
4454         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4455                                   &num_clusters, el);
4456         if (ret) {
4457                 mlog_errno(ret);
4458                 goto out;
4459         }
4460
4461         ret = ocfs2_read_xattr_bucket(first, p_blkno);
4462         if (ret) {
4463                 mlog_errno(ret);
4464                 goto out;
4465         }
4466
4467         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4468         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
4469                 /*
4470                  * This can move first+target if the target bucket moves
4471                  * to the new extent.
4472                  */
4473                 ret = ocfs2_add_new_xattr_cluster(inode,
4474                                                   xb_bh,
4475                                                   first,
4476                                                   target,
4477                                                   &num_clusters,
4478                                                   e_cpos,
4479                                                   &extend,
4480                                                   ctxt);
4481                 if (ret) {
4482                         mlog_errno(ret);
4483                         goto out;
4484                 }
4485         }
4486
4487         if (extend) {
4488                 ret = ocfs2_extend_xattr_bucket(inode,
4489                                                 ctxt->handle,
4490                                                 first,
4491                                                 bucket_blkno(target),
4492                                                 num_clusters);
4493                 if (ret)
4494                         mlog_errno(ret);
4495         }
4496
4497 out:
4498         ocfs2_xattr_bucket_free(first);
4499
4500         return ret;
4501 }
4502
4503 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4504                                         struct ocfs2_xattr_bucket *bucket,
4505                                         int offs)
4506 {
4507         int block_off = offs >> inode->i_sb->s_blocksize_bits;
4508
4509         offs = offs % inode->i_sb->s_blocksize;
4510         return bucket_block(bucket, block_off) + offs;
4511 }
4512
4513 /*
4514  * Handle the normal xattr set, including replace, delete and new.
4515  *
4516  * Note: "local" indicates the real data's locality. So we can't
4517  * just its bucket locality by its length.
4518  */
4519 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4520                                          struct ocfs2_xattr_info *xi,
4521                                          struct ocfs2_xattr_search *xs,
4522                                          u32 name_hash,
4523                                          int local)
4524 {
4525         struct ocfs2_xattr_entry *last, *xe;
4526         int name_len = strlen(xi->name);
4527         struct ocfs2_xattr_header *xh = xs->header;
4528         u16 count = le16_to_cpu(xh->xh_count), start;
4529         size_t blocksize = inode->i_sb->s_blocksize;
4530         char *val;
4531         size_t offs, size, new_size;
4532
4533         last = &xh->xh_entries[count];
4534         if (!xs->not_found) {
4535                 xe = xs->here;
4536                 offs = le16_to_cpu(xe->xe_name_offset);
4537                 if (ocfs2_xattr_is_local(xe))
4538                         size = OCFS2_XATTR_SIZE(name_len) +
4539                         OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4540                 else
4541                         size = OCFS2_XATTR_SIZE(name_len) +
4542                         OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4543
4544                 /*
4545                  * If the new value will be stored outside, xi->value has been
4546                  * initalized as an empty ocfs2_xattr_value_root, and the same
4547                  * goes with xi->value_len, so we can set new_size safely here.
4548                  * See ocfs2_xattr_set_in_bucket.
4549                  */
4550                 new_size = OCFS2_XATTR_SIZE(name_len) +
4551                            OCFS2_XATTR_SIZE(xi->value_len);
4552
4553                 le16_add_cpu(&xh->xh_name_value_len, -size);
4554                 if (xi->value) {
4555                         if (new_size > size)
4556                                 goto set_new_name_value;
4557
4558                         /* Now replace the old value with new one. */
4559                         if (local)
4560                                 xe->xe_value_size = cpu_to_le64(xi->value_len);
4561                         else
4562                                 xe->xe_value_size = 0;
4563
4564                         val = ocfs2_xattr_bucket_get_val(inode,
4565                                                          xs->bucket, offs);
4566                         memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4567                                size - OCFS2_XATTR_SIZE(name_len));
4568                         if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4569                                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
4570                                        xi->value, xi->value_len);
4571
4572                         le16_add_cpu(&xh->xh_name_value_len, new_size);
4573                         ocfs2_xattr_set_local(xe, local);
4574                         return;
4575                 } else {
4576                         /*
4577                          * Remove the old entry if there is more than one.
4578                          * We don't remove the last entry so that we can
4579                          * use it to indicate the hash value of the empty
4580                          * bucket.
4581                          */
4582                         last -= 1;
4583                         le16_add_cpu(&xh->xh_count, -1);
4584                         if (xh->xh_count) {
4585                                 memmove(xe, xe + 1,
4586                                         (void *)last - (void *)xe);
4587                                 memset(last, 0,
4588                                        sizeof(struct ocfs2_xattr_entry));
4589                         } else
4590                                 xh->xh_free_start =
4591                                         cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4592
4593                         return;
4594                 }
4595         } else {
4596                 /* find a new entry for insert. */
4597                 int low = 0, high = count - 1, tmp;
4598                 struct ocfs2_xattr_entry *tmp_xe;
4599
4600                 while (low <= high && count) {
4601                         tmp = (low + high) / 2;
4602                         tmp_xe = &xh->xh_entries[tmp];
4603
4604                         if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4605                                 low = tmp + 1;
4606                         else if (name_hash <
4607                                  le32_to_cpu(tmp_xe->xe_name_hash))
4608                                 high = tmp - 1;
4609                         else {
4610                                 low = tmp;
4611                                 break;
4612                         }
4613                 }
4614
4615                 xe = &xh->xh_entries[low];
4616                 if (low != count)
4617                         memmove(xe + 1, xe, (void *)last - (void *)xe);
4618
4619                 le16_add_cpu(&xh->xh_count, 1);
4620                 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4621                 xe->xe_name_hash = cpu_to_le32(name_hash);
4622                 xe->xe_name_len = name_len;
4623                 ocfs2_xattr_set_type(xe, xi->name_index);
4624         }
4625
4626 set_new_name_value:
4627         /* Insert the new name+value. */
4628         size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4629
4630         /*
4631          * We must make sure that the name/value pair
4632          * exists in the same block.
4633          */
4634         offs = le16_to_cpu(xh->xh_free_start);
4635         start = offs - size;
4636
4637         if (start >> inode->i_sb->s_blocksize_bits !=
4638             (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4639                 offs = offs - offs % blocksize;
4640                 xh->xh_free_start = cpu_to_le16(offs);
4641         }
4642
4643         val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4644         xe->xe_name_offset = cpu_to_le16(offs - size);
4645
4646         memset(val, 0, size);
4647         memcpy(val, xi->name, name_len);
4648         memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4649
4650         xe->xe_value_size = cpu_to_le64(xi->value_len);
4651         ocfs2_xattr_set_local(xe, local);
4652         xs->here = xe;
4653         le16_add_cpu(&xh->xh_free_start, -size);
4654         le16_add_cpu(&xh->xh_name_value_len, size);
4655
4656         return;
4657 }
4658
4659 /*
4660  * Set the xattr entry in the specified bucket.
4661  * The bucket is indicated by xs->bucket and it should have the enough
4662  * space for the xattr insertion.
4663  */
4664 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4665                                            handle_t *handle,
4666                                            struct ocfs2_xattr_info *xi,
4667                                            struct ocfs2_xattr_search *xs,
4668                                            u32 name_hash,
4669                                            int local)
4670 {
4671         int ret;
4672         u64 blkno;
4673
4674         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4675              (unsigned long)xi->value_len, xi->name_index,
4676              (unsigned long long)bucket_blkno(xs->bucket));
4677
4678         if (!xs->bucket->bu_bhs[1]) {
4679                 blkno = bucket_blkno(xs->bucket);
4680                 ocfs2_xattr_bucket_relse(xs->bucket);
4681                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4682                 if (ret) {
4683                         mlog_errno(ret);
4684                         goto out;
4685                 }
4686         }
4687
4688         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4689                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4690         if (ret < 0) {
4691                 mlog_errno(ret);
4692                 goto out;
4693         }
4694
4695         ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4696         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4697
4698 out:
4699         return ret;
4700 }
4701
4702 /*
4703  * Truncate the specified xe_off entry in xattr bucket.
4704  * bucket is indicated by header_bh and len is the new length.
4705  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4706  *
4707  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4708  */
4709 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4710                                              struct ocfs2_xattr_bucket *bucket,
4711                                              int xe_off,
4712                                              int len,
4713                                              struct ocfs2_xattr_set_ctxt *ctxt)
4714 {
4715         int ret, offset;
4716         u64 value_blk;
4717         struct ocfs2_xattr_entry *xe;
4718         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4719         size_t blocksize = inode->i_sb->s_blocksize;
4720         struct ocfs2_xattr_value_buf vb = {
4721                 .vb_access = ocfs2_journal_access,
4722         };
4723
4724         xe = &xh->xh_entries[xe_off];
4725
4726         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4727
4728         offset = le16_to_cpu(xe->xe_name_offset) +
4729                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4730
4731         value_blk = offset / blocksize;
4732
4733         /* We don't allow ocfs2_xattr_value to be stored in different block. */
4734         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4735
4736         vb.vb_bh = bucket->bu_bhs[value_blk];
4737         BUG_ON(!vb.vb_bh);
4738
4739         vb.vb_xv = (struct ocfs2_xattr_value_root *)
4740                 (vb.vb_bh->b_data + offset % blocksize);
4741
4742         /*
4743          * From here on out we have to dirty the bucket.  The generic
4744          * value calls only modify one of the bucket's bhs, but we need
4745          * to send the bucket at once.  So if they error, they *could* have
4746          * modified something.  We have to assume they did, and dirty
4747          * the whole bucket.  This leaves us in a consistent state.
4748          */
4749         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4750              xe_off, (unsigned long long)bucket_blkno(bucket), len);
4751         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
4752         if (ret) {
4753                 mlog_errno(ret);
4754                 goto out;
4755         }
4756
4757         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
4758                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4759         if (ret) {
4760                 mlog_errno(ret);
4761                 goto out;
4762         }
4763
4764         xe->xe_value_size = cpu_to_le64(len);
4765
4766         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
4767
4768 out:
4769         return ret;
4770 }
4771
4772 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4773                                         struct ocfs2_xattr_search *xs,
4774                                         int len,
4775                                         struct ocfs2_xattr_set_ctxt *ctxt)
4776 {
4777         int ret, offset;
4778         struct ocfs2_xattr_entry *xe = xs->here;
4779         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4780
4781         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4782
4783         offset = xe - xh->xh_entries;
4784         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
4785                                                 offset, len, ctxt);
4786         if (ret)
4787                 mlog_errno(ret);
4788
4789         return ret;
4790 }
4791
4792 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4793                                                 handle_t *handle,
4794                                                 struct ocfs2_xattr_search *xs,
4795                                                 char *val,
4796                                                 int value_len)
4797 {
4798         int ret, offset, block_off;
4799         struct ocfs2_xattr_value_root *xv;
4800         struct ocfs2_xattr_entry *xe = xs->here;
4801         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4802         void *base;
4803
4804         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4805
4806         ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
4807                                                 xe - xh->xh_entries,
4808                                                 &block_off,
4809                                                 &offset);
4810         if (ret) {
4811                 mlog_errno(ret);
4812                 goto out;
4813         }
4814
4815         base = bucket_block(xs->bucket, block_off);
4816         xv = (struct ocfs2_xattr_value_root *)(base + offset +
4817                  OCFS2_XATTR_SIZE(xe->xe_name_len));
4818
4819         ret = __ocfs2_xattr_set_value_outside(inode, handle,
4820                                               xv, val, value_len);
4821         if (ret)
4822                 mlog_errno(ret);
4823 out:
4824         return ret;
4825 }
4826
4827 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4828                                   struct buffer_head *root_bh,
4829                                   u64 blkno,
4830                                   u32 cpos,
4831                                   u32 len)
4832 {
4833         int ret;
4834         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4835         struct inode *tl_inode = osb->osb_tl_inode;
4836         handle_t *handle;
4837         struct ocfs2_xattr_block *xb =
4838                         (struct ocfs2_xattr_block *)root_bh->b_data;
4839         struct ocfs2_alloc_context *meta_ac = NULL;
4840         struct ocfs2_cached_dealloc_ctxt dealloc;
4841         struct ocfs2_extent_tree et;
4842
4843         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4844
4845         ocfs2_init_dealloc_ctxt(&dealloc);
4846
4847         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4848              cpos, len, (unsigned long long)blkno);
4849
4850         ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4851
4852         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4853         if (ret) {
4854                 mlog_errno(ret);
4855                 return ret;
4856         }
4857
4858         mutex_lock(&tl_inode->i_mutex);
4859
4860         if (ocfs2_truncate_log_needs_flush(osb)) {
4861                 ret = __ocfs2_flush_truncate_log(osb);
4862                 if (ret < 0) {
4863                         mlog_errno(ret);
4864                         goto out;
4865                 }
4866         }
4867
4868         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
4869         if (IS_ERR(handle)) {
4870                 ret = -ENOMEM;
4871                 mlog_errno(ret);
4872                 goto out;
4873         }
4874
4875         ret = ocfs2_journal_access_xb(handle, inode, root_bh,
4876                                       OCFS2_JOURNAL_ACCESS_WRITE);
4877         if (ret) {
4878                 mlog_errno(ret);
4879                 goto out_commit;
4880         }
4881
4882         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4883                                   &dealloc);
4884         if (ret) {
4885                 mlog_errno(ret);
4886                 goto out_commit;
4887         }
4888
4889         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4890
4891         ret = ocfs2_journal_dirty(handle, root_bh);
4892         if (ret) {
4893                 mlog_errno(ret);
4894                 goto out_commit;
4895         }
4896
4897         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4898         if (ret)
4899                 mlog_errno(ret);
4900
4901 out_commit:
4902         ocfs2_commit_trans(osb, handle);
4903 out:
4904         ocfs2_schedule_truncate_log_flush(osb, 1);
4905
4906         mutex_unlock(&tl_inode->i_mutex);
4907
4908         if (meta_ac)
4909                 ocfs2_free_alloc_context(meta_ac);
4910
4911         ocfs2_run_deallocs(osb, &dealloc);
4912
4913         return ret;
4914 }
4915
4916 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4917                                          handle_t *handle,
4918                                          struct ocfs2_xattr_search *xs)
4919 {
4920         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4921         struct ocfs2_xattr_entry *last = &xh->xh_entries[
4922                                                 le16_to_cpu(xh->xh_count) - 1];
4923         int ret = 0;
4924
4925         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4926                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4927         if (ret) {
4928                 mlog_errno(ret);
4929                 return;
4930         }
4931
4932         /* Remove the old entry. */
4933         memmove(xs->here, xs->here + 1,
4934                 (void *)last - (void *)xs->here);
4935         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4936         le16_add_cpu(&xh->xh_count, -1);
4937
4938         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4939 }
4940
4941 /*
4942  * Set the xattr name/value in the bucket specified in xs.
4943  *
4944  * As the new value in xi may be stored in the bucket or in an outside cluster,
4945  * we divide the whole process into 3 steps:
4946  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4947  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4948  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4949  * 4. If the clusters for the new outside value can't be allocated, we need
4950  *    to free the xattr we allocated in set.
4951  */
4952 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4953                                      struct ocfs2_xattr_info *xi,
4954                                      struct ocfs2_xattr_search *xs,
4955                                      struct ocfs2_xattr_set_ctxt *ctxt)
4956 {
4957         int ret, local = 1;
4958         size_t value_len;
4959         char *val = (char *)xi->value;
4960         struct ocfs2_xattr_entry *xe = xs->here;
4961         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4962                                               strlen(xi->name));
4963
4964         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4965                 /*
4966                  * We need to truncate the xattr storage first.
4967                  *
4968                  * If both the old and new value are stored to
4969                  * outside block, we only need to truncate
4970                  * the storage and then set the value outside.
4971                  *
4972                  * If the new value should be stored within block,
4973                  * we should free all the outside block first and
4974                  * the modification to the xattr block will be done
4975                  * by following steps.
4976                  */
4977                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4978                         value_len = xi->value_len;
4979                 else
4980                         value_len = 0;
4981
4982                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4983                                                            value_len,
4984                                                            ctxt);
4985                 if (ret)
4986                         goto out;
4987
4988                 if (value_len)
4989                         goto set_value_outside;
4990         }
4991
4992         value_len = xi->value_len;
4993         /* So we have to handle the inside block change now. */
4994         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4995                 /*
4996                  * If the new value will be stored outside of block,
4997                  * initalize a new empty value root and insert it first.
4998                  */
4999                 local = 0;
5000                 xi->value = &def_xv;
5001                 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
5002         }
5003
5004         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
5005                                               name_hash, local);
5006         if (ret) {
5007                 mlog_errno(ret);
5008                 goto out;
5009         }
5010
5011         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
5012                 goto out;
5013
5014         /* allocate the space now for the outside block storage. */
5015         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5016                                                    value_len, ctxt);
5017         if (ret) {
5018                 mlog_errno(ret);
5019
5020                 if (xs->not_found) {
5021                         /*
5022                          * We can't allocate enough clusters for outside
5023                          * storage and we have allocated xattr already,
5024                          * so need to remove it.
5025                          */
5026                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
5027                 }
5028                 goto out;
5029         }
5030
5031 set_value_outside:
5032         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5033                                                    xs, val, value_len);
5034 out:
5035         return ret;
5036 }
5037
5038 /*
5039  * check whether the xattr bucket is filled up with the same hash value.
5040  * If we want to insert the xattr with the same hash, return -ENOSPC.
5041  * If we want to insert a xattr with different hash value, go ahead
5042  * and ocfs2_divide_xattr_bucket will handle this.
5043  */
5044 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5045                                               struct ocfs2_xattr_bucket *bucket,
5046                                               const char *name)
5047 {
5048         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5049         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5050
5051         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5052                 return 0;
5053
5054         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5055             xh->xh_entries[0].xe_name_hash) {
5056                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5057                      "hash = %u\n",
5058                      (unsigned long long)bucket_blkno(bucket),
5059                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5060                 return -ENOSPC;
5061         }
5062
5063         return 0;
5064 }
5065
5066 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5067                                              struct ocfs2_xattr_info *xi,
5068                                              struct ocfs2_xattr_search *xs,
5069                                              struct ocfs2_xattr_set_ctxt *ctxt)
5070 {
5071         struct ocfs2_xattr_header *xh;
5072         struct ocfs2_xattr_entry *xe;
5073         u16 count, header_size, xh_free_start;
5074         int free, max_free, need, old;
5075         size_t value_size = 0, name_len = strlen(xi->name);
5076         size_t blocksize = inode->i_sb->s_blocksize;
5077         int ret, allocation = 0;
5078
5079         mlog_entry("Set xattr %s in xattr index block\n", xi->name);
5080
5081 try_again:
5082         xh = xs->header;
5083         count = le16_to_cpu(xh->xh_count);
5084         xh_free_start = le16_to_cpu(xh->xh_free_start);
5085         header_size = sizeof(struct ocfs2_xattr_header) +
5086                         count * sizeof(struct ocfs2_xattr_entry);
5087         max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5088                 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5089
5090         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5091                         "of %u which exceed block size\n",
5092                         (unsigned long long)bucket_blkno(xs->bucket),
5093                         header_size);
5094
5095         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5096                 value_size = OCFS2_XATTR_ROOT_SIZE;
5097         else if (xi->value)
5098                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
5099
5100         if (xs->not_found)
5101                 need = sizeof(struct ocfs2_xattr_entry) +
5102                         OCFS2_XATTR_SIZE(name_len) + value_size;
5103         else {
5104                 need = value_size + OCFS2_XATTR_SIZE(name_len);
5105
5106                 /*
5107                  * We only replace the old value if the new length is smaller
5108                  * than the old one. Otherwise we will allocate new space in the
5109                  * bucket to store it.
5110                  */
5111                 xe = xs->here;
5112                 if (ocfs2_xattr_is_local(xe))
5113                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5114                 else
5115                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5116
5117                 if (old >= value_size)
5118                         need = 0;
5119         }
5120
5121         free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5122         /*
5123          * We need to make sure the new name/value pair
5124          * can exist in the same block.
5125          */
5126         if (xh_free_start % blocksize < need)
5127                 free -= xh_free_start % blocksize;
5128
5129         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5130              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5131              " %u\n", xs->not_found,
5132              (unsigned long long)bucket_blkno(xs->bucket),
5133              free, need, max_free, le16_to_cpu(xh->xh_free_start),
5134              le16_to_cpu(xh->xh_name_value_len));
5135
5136         if (free < need ||
5137             (xs->not_found &&
5138              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5139                 if (need <= max_free &&
5140                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5141                         /*
5142                          * We can create the space by defragment. Since only the
5143                          * name/value will be moved, the xe shouldn't be changed
5144                          * in xs.
5145                          */
5146                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5147                                                         xs->bucket);
5148                         if (ret) {
5149                                 mlog_errno(ret);
5150                                 goto out;
5151                         }
5152
5153                         xh_free_start = le16_to_cpu(xh->xh_free_start);
5154                         free = xh_free_start - header_size
5155                                 - OCFS2_XATTR_HEADER_GAP;
5156                         if (xh_free_start % blocksize < need)
5157                                 free -= xh_free_start % blocksize;
5158
5159                         if (free >= need)
5160                                 goto xattr_set;
5161
5162                         mlog(0, "Can't get enough space for xattr insert by "
5163                              "defragment. Need %u bytes, but we have %d, so "
5164                              "allocate new bucket for it.\n", need, free);
5165                 }
5166
5167                 /*
5168                  * We have to add new buckets or clusters and one
5169                  * allocation should leave us enough space for insert.
5170                  */
5171                 BUG_ON(allocation);
5172
5173                 /*
5174                  * We do not allow for overlapping ranges between buckets. And
5175                  * the maximum number of collisions we will allow for then is
5176                  * one bucket's worth, so check it here whether we need to
5177                  * add a new bucket for the insert.
5178                  */
5179                 ret = ocfs2_check_xattr_bucket_collision(inode,
5180                                                          xs->bucket,
5181                                                          xi->name);
5182                 if (ret) {
5183                         mlog_errno(ret);
5184                         goto out;
5185                 }
5186
5187                 ret = ocfs2_add_new_xattr_bucket(inode,
5188                                                  xs->xattr_bh,
5189                                                  xs->bucket,
5190                                                  ctxt);
5191                 if (ret) {
5192                         mlog_errno(ret);
5193                         goto out;
5194                 }
5195
5196                 /*
5197                  * ocfs2_add_new_xattr_bucket() will have updated
5198                  * xs->bucket if it moved, but it will not have updated
5199                  * any of the other search fields.  Thus, we drop it and
5200                  * re-search.  Everything should be cached, so it'll be
5201                  * quick.
5202                  */
5203                 ocfs2_xattr_bucket_relse(xs->bucket);
5204                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5205                                                    xi->name_index,
5206                                                    xi->name, xs);
5207                 if (ret && ret != -ENODATA)
5208                         goto out;
5209                 xs->not_found = ret;
5210                 allocation = 1;
5211                 goto try_again;
5212         }
5213
5214 xattr_set:
5215         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5216 out:
5217         mlog_exit(ret);
5218         return ret;
5219 }
5220
5221 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5222                                         struct ocfs2_xattr_bucket *bucket,
5223                                         void *para)
5224 {
5225         int ret = 0;
5226         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5227         u16 i;
5228         struct ocfs2_xattr_entry *xe;
5229         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5230         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5231         int credits = ocfs2_remove_extent_credits(osb->sb) +
5232                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5233
5234
5235         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5236
5237         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5238                 xe = &xh->xh_entries[i];
5239                 if (ocfs2_xattr_is_local(xe))
5240                         continue;
5241
5242                 ctxt.handle = ocfs2_start_trans(osb, credits);
5243                 if (IS_ERR(ctxt.handle)) {
5244                         ret = PTR_ERR(ctxt.handle);
5245                         mlog_errno(ret);
5246                         break;
5247                 }
5248
5249                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5250                                                         i, 0, &ctxt);
5251
5252                 ocfs2_commit_trans(osb, ctxt.handle);
5253                 if (ret) {
5254                         mlog_errno(ret);
5255                         break;
5256                 }
5257         }
5258
5259         ocfs2_schedule_truncate_log_flush(osb, 1);
5260         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5261         return ret;
5262 }
5263
5264 static int ocfs2_delete_xattr_index_block(struct inode *inode,
5265                                           struct buffer_head *xb_bh)
5266 {
5267         struct ocfs2_xattr_block *xb =
5268                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5269         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
5270         int ret = 0;
5271         u32 name_hash = UINT_MAX, e_cpos, num_clusters;
5272         u64 p_blkno;
5273
5274         if (le16_to_cpu(el->l_next_free_rec) == 0)
5275                 return 0;
5276
5277         while (name_hash > 0) {
5278                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
5279                                           &e_cpos, &num_clusters, el);
5280                 if (ret) {
5281                         mlog_errno(ret);
5282                         goto out;
5283                 }
5284
5285                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
5286                                                   ocfs2_delete_xattr_in_bucket,
5287                                                   NULL);
5288                 if (ret) {
5289                         mlog_errno(ret);
5290                         goto out;
5291                 }
5292
5293                 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
5294                                              p_blkno, e_cpos, num_clusters);
5295                 if (ret) {
5296                         mlog_errno(ret);
5297                         break;
5298                 }
5299
5300                 if (e_cpos == 0)
5301                         break;
5302
5303                 name_hash = e_cpos - 1;
5304         }
5305
5306 out:
5307         return ret;
5308 }
5309
5310 /*
5311  * 'security' attributes support
5312  */
5313 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
5314                                         size_t list_size, const char *name,
5315                                         size_t name_len)
5316 {
5317         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
5318         const size_t total_len = prefix_len + name_len + 1;
5319
5320         if (list && total_len <= list_size) {
5321                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
5322                 memcpy(list + prefix_len, name, name_len);
5323                 list[prefix_len + name_len] = '\0';
5324         }
5325         return total_len;
5326 }
5327
5328 static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
5329                                     void *buffer, size_t size)
5330 {
5331         if (strcmp(name, "") == 0)
5332                 return -EINVAL;
5333         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
5334                                buffer, size);
5335 }
5336
5337 static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
5338                                     const void *value, size_t size, int flags)
5339 {
5340         if (strcmp(name, "") == 0)
5341                 return -EINVAL;
5342
5343         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
5344                                size, flags);
5345 }
5346
5347 int ocfs2_init_security_get(struct inode *inode,
5348                             struct inode *dir,
5349                             struct ocfs2_security_xattr_info *si)
5350 {
5351         /* check whether ocfs2 support feature xattr */
5352         if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
5353                 return -EOPNOTSUPP;
5354         return security_inode_init_security(inode, dir, &si->name, &si->value,
5355                                             &si->value_len);
5356 }
5357
5358 int ocfs2_init_security_set(handle_t *handle,
5359                             struct inode *inode,
5360                             struct buffer_head *di_bh,
5361                             struct ocfs2_security_xattr_info *si,
5362                             struct ocfs2_alloc_context *xattr_ac,
5363                             struct ocfs2_alloc_context *data_ac)
5364 {
5365         return ocfs2_xattr_set_handle(handle, inode, di_bh,
5366                                      OCFS2_XATTR_INDEX_SECURITY,
5367                                      si->name, si->value, si->value_len, 0,
5368                                      xattr_ac, data_ac);
5369 }
5370
5371 struct xattr_handler ocfs2_xattr_security_handler = {
5372         .prefix = XATTR_SECURITY_PREFIX,
5373         .list   = ocfs2_xattr_security_list,
5374         .get    = ocfs2_xattr_security_get,
5375         .set    = ocfs2_xattr_security_set,
5376 };
5377
5378 /*
5379  * 'trusted' attributes support
5380  */
5381 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
5382                                        size_t list_size, const char *name,
5383                                        size_t name_len)
5384 {
5385         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
5386         const size_t total_len = prefix_len + name_len + 1;
5387
5388         if (list && total_len <= list_size) {
5389                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
5390                 memcpy(list + prefix_len, name, name_len);
5391                 list[prefix_len + name_len] = '\0';
5392         }
5393         return total_len;
5394 }
5395
5396 static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
5397                                    void *buffer, size_t size)
5398 {
5399         if (strcmp(name, "") == 0)
5400                 return -EINVAL;
5401         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
5402                                buffer, size);
5403 }
5404
5405 static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
5406                                    const void *value, size_t size, int flags)
5407 {
5408         if (strcmp(name, "") == 0)
5409                 return -EINVAL;
5410
5411         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
5412                                size, flags);
5413 }
5414
5415 struct xattr_handler ocfs2_xattr_trusted_handler = {
5416         .prefix = XATTR_TRUSTED_PREFIX,
5417         .list   = ocfs2_xattr_trusted_list,
5418         .get    = ocfs2_xattr_trusted_get,
5419         .set    = ocfs2_xattr_trusted_set,
5420 };
5421
5422 /*
5423  * 'user' attributes support
5424  */
5425 static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
5426                                     size_t list_size, const char *name,
5427                                     size_t name_len)
5428 {
5429         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
5430         const size_t total_len = prefix_len + name_len + 1;
5431         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5432
5433         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5434                 return 0;
5435
5436         if (list && total_len <= list_size) {
5437                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
5438                 memcpy(list + prefix_len, name, name_len);
5439                 list[prefix_len + name_len] = '\0';
5440         }
5441         return total_len;
5442 }
5443
5444 static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
5445                                 void *buffer, size_t size)
5446 {
5447         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5448
5449         if (strcmp(name, "") == 0)
5450                 return -EINVAL;
5451         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5452                 return -EOPNOTSUPP;
5453         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
5454                                buffer, size);
5455 }
5456
5457 static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
5458                                 const void *value, size_t size, int flags)
5459 {
5460         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5461
5462         if (strcmp(name, "") == 0)
5463                 return -EINVAL;
5464         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5465                 return -EOPNOTSUPP;
5466
5467         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
5468                                size, flags);
5469 }
5470
5471 struct xattr_handler ocfs2_xattr_user_handler = {
5472         .prefix = XATTR_USER_PREFIX,
5473         .list   = ocfs2_xattr_user_list,
5474         .get    = ocfs2_xattr_user_get,
5475         .set    = ocfs2_xattr_user_set,
5476 };