ocfs2: add ocfs2_init_security in during file create
[linux-2.6.git] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "dlmglue.h"
46 #include "file.h"
47 #include "symlink.h"
48 #include "sysfile.h"
49 #include "inode.h"
50 #include "journal.h"
51 #include "ocfs2_fs.h"
52 #include "suballoc.h"
53 #include "uptodate.h"
54 #include "buffer_head_io.h"
55 #include "super.h"
56 #include "xattr.h"
57
58
59 struct ocfs2_xattr_def_value_root {
60         struct ocfs2_xattr_value_root   xv;
61         struct ocfs2_extent_rec         er;
62 };
63
64 struct ocfs2_xattr_bucket {
65         /* The inode these xattrs are associated with */
66         struct inode *bu_inode;
67
68         /* The actual buffers that make up the bucket */
69         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
70
71         /* How many blocks make up one bucket for this filesystem */
72         int bu_blocks;
73 };
74
75 struct ocfs2_xattr_set_ctxt {
76         handle_t *handle;
77         struct ocfs2_alloc_context *meta_ac;
78         struct ocfs2_alloc_context *data_ac;
79         struct ocfs2_cached_dealloc_ctxt dealloc;
80 };
81
82 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
83 #define OCFS2_XATTR_INLINE_SIZE 80
84 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
85                                          - sizeof(struct ocfs2_xattr_header) \
86                                          - sizeof(__u32))
87
88 static struct ocfs2_xattr_def_value_root def_xv = {
89         .xv.xr_list.l_count = cpu_to_le16(1),
90 };
91
92 struct xattr_handler *ocfs2_xattr_handlers[] = {
93         &ocfs2_xattr_user_handler,
94         &ocfs2_xattr_trusted_handler,
95         &ocfs2_xattr_security_handler,
96         NULL
97 };
98
99 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
100         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
101         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
102         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
103 };
104
105 struct ocfs2_xattr_info {
106         int name_index;
107         const char *name;
108         const void *value;
109         size_t value_len;
110 };
111
112 struct ocfs2_xattr_search {
113         struct buffer_head *inode_bh;
114         /*
115          * xattr_bh point to the block buffer head which has extended attribute
116          * when extended attribute in inode, xattr_bh is equal to inode_bh.
117          */
118         struct buffer_head *xattr_bh;
119         struct ocfs2_xattr_header *header;
120         struct ocfs2_xattr_bucket *bucket;
121         void *base;
122         void *end;
123         struct ocfs2_xattr_entry *here;
124         int not_found;
125 };
126
127 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
128                                              struct ocfs2_xattr_header *xh,
129                                              int index,
130                                              int *block_off,
131                                              int *new_offset);
132
133 static int ocfs2_xattr_block_find(struct inode *inode,
134                                   int name_index,
135                                   const char *name,
136                                   struct ocfs2_xattr_search *xs);
137 static int ocfs2_xattr_index_block_find(struct inode *inode,
138                                         struct buffer_head *root_bh,
139                                         int name_index,
140                                         const char *name,
141                                         struct ocfs2_xattr_search *xs);
142
143 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
144                                         struct ocfs2_xattr_tree_root *xt,
145                                         char *buffer,
146                                         size_t buffer_size);
147
148 static int ocfs2_xattr_create_index_block(struct inode *inode,
149                                           struct ocfs2_xattr_search *xs,
150                                           struct ocfs2_xattr_set_ctxt *ctxt);
151
152 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
153                                              struct ocfs2_xattr_info *xi,
154                                              struct ocfs2_xattr_search *xs,
155                                              struct ocfs2_xattr_set_ctxt *ctxt);
156
157 static int ocfs2_delete_xattr_index_block(struct inode *inode,
158                                           struct buffer_head *xb_bh);
159
160 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
161 {
162         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
163 }
164
165 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
166 {
167         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
168 }
169
170 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
171 {
172         u16 len = sb->s_blocksize -
173                  offsetof(struct ocfs2_xattr_header, xh_entries);
174
175         return len / sizeof(struct ocfs2_xattr_entry);
176 }
177
178 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
179 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
180 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
181
182 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
183 {
184         struct ocfs2_xattr_bucket *bucket;
185         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
186
187         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
188
189         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
190         if (bucket) {
191                 bucket->bu_inode = inode;
192                 bucket->bu_blocks = blks;
193         }
194
195         return bucket;
196 }
197
198 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
199 {
200         int i;
201
202         for (i = 0; i < bucket->bu_blocks; i++) {
203                 brelse(bucket->bu_bhs[i]);
204                 bucket->bu_bhs[i] = NULL;
205         }
206 }
207
208 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
209 {
210         if (bucket) {
211                 ocfs2_xattr_bucket_relse(bucket);
212                 bucket->bu_inode = NULL;
213                 kfree(bucket);
214         }
215 }
216
217 /*
218  * A bucket that has never been written to disk doesn't need to be
219  * read.  We just need the buffer_heads.  Don't call this for
220  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
221  * them fully.
222  */
223 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
224                                    u64 xb_blkno)
225 {
226         int i, rc = 0;
227
228         for (i = 0; i < bucket->bu_blocks; i++) {
229                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
230                                               xb_blkno + i);
231                 if (!bucket->bu_bhs[i]) {
232                         rc = -EIO;
233                         mlog_errno(rc);
234                         break;
235                 }
236
237                 if (!ocfs2_buffer_uptodate(bucket->bu_inode,
238                                            bucket->bu_bhs[i]))
239                         ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
240                                                       bucket->bu_bhs[i]);
241         }
242
243         if (rc)
244                 ocfs2_xattr_bucket_relse(bucket);
245         return rc;
246 }
247
248 /* Read the xattr bucket at xb_blkno */
249 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
250                                    u64 xb_blkno)
251 {
252         int rc;
253
254         rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
255                                bucket->bu_blocks, bucket->bu_bhs, 0);
256         if (rc)
257                 ocfs2_xattr_bucket_relse(bucket);
258         return rc;
259 }
260
261 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
262                                              struct ocfs2_xattr_bucket *bucket,
263                                              int type)
264 {
265         int i, rc = 0;
266
267         for (i = 0; i < bucket->bu_blocks; i++) {
268                 rc = ocfs2_journal_access(handle, bucket->bu_inode,
269                                           bucket->bu_bhs[i], type);
270                 if (rc) {
271                         mlog_errno(rc);
272                         break;
273                 }
274         }
275
276         return rc;
277 }
278
279 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
280                                              struct ocfs2_xattr_bucket *bucket)
281 {
282         int i;
283
284         for (i = 0; i < bucket->bu_blocks; i++)
285                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
286 }
287
288 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
289                                          struct ocfs2_xattr_bucket *src)
290 {
291         int i;
292         int blocksize = src->bu_inode->i_sb->s_blocksize;
293
294         BUG_ON(dest->bu_blocks != src->bu_blocks);
295         BUG_ON(dest->bu_inode != src->bu_inode);
296
297         for (i = 0; i < src->bu_blocks; i++) {
298                 memcpy(bucket_block(dest, i), bucket_block(src, i),
299                        blocksize);
300         }
301 }
302
303 static inline const char *ocfs2_xattr_prefix(int name_index)
304 {
305         struct xattr_handler *handler = NULL;
306
307         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
308                 handler = ocfs2_xattr_handler_map[name_index];
309
310         return handler ? handler->prefix : NULL;
311 }
312
313 static u32 ocfs2_xattr_name_hash(struct inode *inode,
314                                  const char *name,
315                                  int name_len)
316 {
317         /* Get hash value of uuid from super block */
318         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
319         int i;
320
321         /* hash extended attribute name */
322         for (i = 0; i < name_len; i++) {
323                 hash = (hash << OCFS2_HASH_SHIFT) ^
324                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
325                        *name++;
326         }
327
328         return hash;
329 }
330
331 /*
332  * ocfs2_xattr_hash_entry()
333  *
334  * Compute the hash of an extended attribute.
335  */
336 static void ocfs2_xattr_hash_entry(struct inode *inode,
337                                    struct ocfs2_xattr_header *header,
338                                    struct ocfs2_xattr_entry *entry)
339 {
340         u32 hash = 0;
341         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
342
343         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
344         entry->xe_name_hash = cpu_to_le32(hash);
345
346         return;
347 }
348
349 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
350 {
351         int size = 0;
352
353         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
354                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
355         else
356                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
357         size += sizeof(struct ocfs2_xattr_entry);
358
359         return size;
360 }
361
362 int ocfs2_calc_security_init(struct inode *dir,
363                              struct ocfs2_security_xattr_info *si,
364                              int *want_clusters,
365                              int *xattr_credits,
366                              struct ocfs2_alloc_context **xattr_ac)
367 {
368         int ret = 0;
369         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
370         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
371                                                  si->value_len);
372
373         /*
374          * The max space of security xattr taken inline is
375          * 256(name) + 80(value) + 16(entry) = 352 bytes,
376          * So reserve one metadata block for it is ok.
377          */
378         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
379             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
380                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
381                 if (ret) {
382                         mlog_errno(ret);
383                         return ret;
384                 }
385                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
386         }
387
388         /* reserve clusters for xattr value which will be set in B tree*/
389         if (si->value_len > OCFS2_XATTR_INLINE_SIZE)
390                 *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
391                                                            si->value_len);
392         return ret;
393 }
394
395 static int ocfs2_xattr_extend_allocation(struct inode *inode,
396                                          u32 clusters_to_add,
397                                          struct buffer_head *xattr_bh,
398                                          struct ocfs2_xattr_value_root *xv,
399                                          struct ocfs2_xattr_set_ctxt *ctxt)
400 {
401         int status = 0;
402         handle_t *handle = ctxt->handle;
403         enum ocfs2_alloc_restarted why;
404         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
405         u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
406         struct ocfs2_extent_tree et;
407
408         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
409
410         ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
411
412         status = ocfs2_journal_access(handle, inode, xattr_bh,
413                                       OCFS2_JOURNAL_ACCESS_WRITE);
414         if (status < 0) {
415                 mlog_errno(status);
416                 goto leave;
417         }
418
419         prev_clusters = le32_to_cpu(xv->xr_clusters);
420         status = ocfs2_add_clusters_in_btree(osb,
421                                              inode,
422                                              &logical_start,
423                                              clusters_to_add,
424                                              0,
425                                              &et,
426                                              handle,
427                                              ctxt->data_ac,
428                                              ctxt->meta_ac,
429                                              &why);
430         if (status < 0) {
431                 mlog_errno(status);
432                 goto leave;
433         }
434
435         status = ocfs2_journal_dirty(handle, xattr_bh);
436         if (status < 0) {
437                 mlog_errno(status);
438                 goto leave;
439         }
440
441         clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
442
443         /*
444          * We should have already allocated enough space before the transaction,
445          * so no need to restart.
446          */
447         BUG_ON(why != RESTART_NONE || clusters_to_add);
448
449 leave:
450
451         return status;
452 }
453
454 static int __ocfs2_remove_xattr_range(struct inode *inode,
455                                       struct buffer_head *root_bh,
456                                       struct ocfs2_xattr_value_root *xv,
457                                       u32 cpos, u32 phys_cpos, u32 len,
458                                       struct ocfs2_xattr_set_ctxt *ctxt)
459 {
460         int ret;
461         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
462         handle_t *handle = ctxt->handle;
463         struct ocfs2_extent_tree et;
464
465         ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
466
467         ret = ocfs2_journal_access(handle, inode, root_bh,
468                                    OCFS2_JOURNAL_ACCESS_WRITE);
469         if (ret) {
470                 mlog_errno(ret);
471                 goto out;
472         }
473
474         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
475                                   &ctxt->dealloc);
476         if (ret) {
477                 mlog_errno(ret);
478                 goto out;
479         }
480
481         le32_add_cpu(&xv->xr_clusters, -len);
482
483         ret = ocfs2_journal_dirty(handle, root_bh);
484         if (ret) {
485                 mlog_errno(ret);
486                 goto out;
487         }
488
489         ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
490         if (ret)
491                 mlog_errno(ret);
492
493 out:
494         return ret;
495 }
496
497 static int ocfs2_xattr_shrink_size(struct inode *inode,
498                                    u32 old_clusters,
499                                    u32 new_clusters,
500                                    struct buffer_head *root_bh,
501                                    struct ocfs2_xattr_value_root *xv,
502                                    struct ocfs2_xattr_set_ctxt *ctxt)
503 {
504         int ret = 0;
505         u32 trunc_len, cpos, phys_cpos, alloc_size;
506         u64 block;
507
508         if (old_clusters <= new_clusters)
509                 return 0;
510
511         cpos = new_clusters;
512         trunc_len = old_clusters - new_clusters;
513         while (trunc_len) {
514                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
515                                                &alloc_size, &xv->xr_list);
516                 if (ret) {
517                         mlog_errno(ret);
518                         goto out;
519                 }
520
521                 if (alloc_size > trunc_len)
522                         alloc_size = trunc_len;
523
524                 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
525                                                  phys_cpos, alloc_size,
526                                                  ctxt);
527                 if (ret) {
528                         mlog_errno(ret);
529                         goto out;
530                 }
531
532                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
533                 ocfs2_remove_xattr_clusters_from_cache(inode, block,
534                                                        alloc_size);
535                 cpos += alloc_size;
536                 trunc_len -= alloc_size;
537         }
538
539 out:
540         return ret;
541 }
542
543 static int ocfs2_xattr_value_truncate(struct inode *inode,
544                                       struct buffer_head *root_bh,
545                                       struct ocfs2_xattr_value_root *xv,
546                                       int len,
547                                       struct ocfs2_xattr_set_ctxt *ctxt)
548 {
549         int ret;
550         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
551         u32 old_clusters = le32_to_cpu(xv->xr_clusters);
552
553         if (new_clusters == old_clusters)
554                 return 0;
555
556         if (new_clusters > old_clusters)
557                 ret = ocfs2_xattr_extend_allocation(inode,
558                                                     new_clusters - old_clusters,
559                                                     root_bh, xv, ctxt);
560         else
561                 ret = ocfs2_xattr_shrink_size(inode,
562                                               old_clusters, new_clusters,
563                                               root_bh, xv, ctxt);
564
565         return ret;
566 }
567
568 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
569                                   size_t *result, const char *prefix,
570                                   const char *name, int name_len)
571 {
572         char *p = buffer + *result;
573         int prefix_len = strlen(prefix);
574         int total_len = prefix_len + name_len + 1;
575
576         *result += total_len;
577
578         /* we are just looking for how big our buffer needs to be */
579         if (!size)
580                 return 0;
581
582         if (*result > size)
583                 return -ERANGE;
584
585         memcpy(p, prefix, prefix_len);
586         memcpy(p + prefix_len, name, name_len);
587         p[prefix_len + name_len] = '\0';
588
589         return 0;
590 }
591
592 static int ocfs2_xattr_list_entries(struct inode *inode,
593                                     struct ocfs2_xattr_header *header,
594                                     char *buffer, size_t buffer_size)
595 {
596         size_t result = 0;
597         int i, type, ret;
598         const char *prefix, *name;
599
600         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
601                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
602                 type = ocfs2_xattr_get_type(entry);
603                 prefix = ocfs2_xattr_prefix(type);
604
605                 if (prefix) {
606                         name = (const char *)header +
607                                 le16_to_cpu(entry->xe_name_offset);
608
609                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
610                                                      &result, prefix, name,
611                                                      entry->xe_name_len);
612                         if (ret)
613                                 return ret;
614                 }
615         }
616
617         return result;
618 }
619
620 static int ocfs2_xattr_ibody_list(struct inode *inode,
621                                   struct ocfs2_dinode *di,
622                                   char *buffer,
623                                   size_t buffer_size)
624 {
625         struct ocfs2_xattr_header *header = NULL;
626         struct ocfs2_inode_info *oi = OCFS2_I(inode);
627         int ret = 0;
628
629         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
630                 return ret;
631
632         header = (struct ocfs2_xattr_header *)
633                  ((void *)di + inode->i_sb->s_blocksize -
634                  le16_to_cpu(di->i_xattr_inline_size));
635
636         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
637
638         return ret;
639 }
640
641 static int ocfs2_xattr_block_list(struct inode *inode,
642                                   struct ocfs2_dinode *di,
643                                   char *buffer,
644                                   size_t buffer_size)
645 {
646         struct buffer_head *blk_bh = NULL;
647         struct ocfs2_xattr_block *xb;
648         int ret = 0;
649
650         if (!di->i_xattr_loc)
651                 return ret;
652
653         ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
654         if (ret < 0) {
655                 mlog_errno(ret);
656                 return ret;
657         }
658
659         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
660         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
661                 ret = -EIO;
662                 goto cleanup;
663         }
664
665         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
666                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
667                 ret = ocfs2_xattr_list_entries(inode, header,
668                                                buffer, buffer_size);
669         } else {
670                 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
671                 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
672                                                    buffer, buffer_size);
673         }
674 cleanup:
675         brelse(blk_bh);
676
677         return ret;
678 }
679
680 ssize_t ocfs2_listxattr(struct dentry *dentry,
681                         char *buffer,
682                         size_t size)
683 {
684         int ret = 0, i_ret = 0, b_ret = 0;
685         struct buffer_head *di_bh = NULL;
686         struct ocfs2_dinode *di = NULL;
687         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
688
689         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
690                 return -EOPNOTSUPP;
691
692         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
693                 return ret;
694
695         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
696         if (ret < 0) {
697                 mlog_errno(ret);
698                 return ret;
699         }
700
701         di = (struct ocfs2_dinode *)di_bh->b_data;
702
703         down_read(&oi->ip_xattr_sem);
704         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
705         if (i_ret < 0)
706                 b_ret = 0;
707         else {
708                 if (buffer) {
709                         buffer += i_ret;
710                         size -= i_ret;
711                 }
712                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
713                                                buffer, size);
714                 if (b_ret < 0)
715                         i_ret = 0;
716         }
717         up_read(&oi->ip_xattr_sem);
718         ocfs2_inode_unlock(dentry->d_inode, 0);
719
720         brelse(di_bh);
721
722         return i_ret + b_ret;
723 }
724
725 static int ocfs2_xattr_find_entry(int name_index,
726                                   const char *name,
727                                   struct ocfs2_xattr_search *xs)
728 {
729         struct ocfs2_xattr_entry *entry;
730         size_t name_len;
731         int i, cmp = 1;
732
733         if (name == NULL)
734                 return -EINVAL;
735
736         name_len = strlen(name);
737         entry = xs->here;
738         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
739                 cmp = name_index - ocfs2_xattr_get_type(entry);
740                 if (!cmp)
741                         cmp = name_len - entry->xe_name_len;
742                 if (!cmp)
743                         cmp = memcmp(name, (xs->base +
744                                      le16_to_cpu(entry->xe_name_offset)),
745                                      name_len);
746                 if (cmp == 0)
747                         break;
748                 entry += 1;
749         }
750         xs->here = entry;
751
752         return cmp ? -ENODATA : 0;
753 }
754
755 static int ocfs2_xattr_get_value_outside(struct inode *inode,
756                                          struct ocfs2_xattr_value_root *xv,
757                                          void *buffer,
758                                          size_t len)
759 {
760         u32 cpos, p_cluster, num_clusters, bpc, clusters;
761         u64 blkno;
762         int i, ret = 0;
763         size_t cplen, blocksize;
764         struct buffer_head *bh = NULL;
765         struct ocfs2_extent_list *el;
766
767         el = &xv->xr_list;
768         clusters = le32_to_cpu(xv->xr_clusters);
769         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
770         blocksize = inode->i_sb->s_blocksize;
771
772         cpos = 0;
773         while (cpos < clusters) {
774                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
775                                                &num_clusters, el);
776                 if (ret) {
777                         mlog_errno(ret);
778                         goto out;
779                 }
780
781                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
782                 /* Copy ocfs2_xattr_value */
783                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
784                         ret = ocfs2_read_block(inode, blkno, &bh);
785                         if (ret) {
786                                 mlog_errno(ret);
787                                 goto out;
788                         }
789
790                         cplen = len >= blocksize ? blocksize : len;
791                         memcpy(buffer, bh->b_data, cplen);
792                         len -= cplen;
793                         buffer += cplen;
794
795                         brelse(bh);
796                         bh = NULL;
797                         if (len == 0)
798                                 break;
799                 }
800                 cpos += num_clusters;
801         }
802 out:
803         return ret;
804 }
805
806 static int ocfs2_xattr_ibody_get(struct inode *inode,
807                                  int name_index,
808                                  const char *name,
809                                  void *buffer,
810                                  size_t buffer_size,
811                                  struct ocfs2_xattr_search *xs)
812 {
813         struct ocfs2_inode_info *oi = OCFS2_I(inode);
814         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
815         struct ocfs2_xattr_value_root *xv;
816         size_t size;
817         int ret = 0;
818
819         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
820                 return -ENODATA;
821
822         xs->end = (void *)di + inode->i_sb->s_blocksize;
823         xs->header = (struct ocfs2_xattr_header *)
824                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
825         xs->base = (void *)xs->header;
826         xs->here = xs->header->xh_entries;
827
828         ret = ocfs2_xattr_find_entry(name_index, name, xs);
829         if (ret)
830                 return ret;
831         size = le64_to_cpu(xs->here->xe_value_size);
832         if (buffer) {
833                 if (size > buffer_size)
834                         return -ERANGE;
835                 if (ocfs2_xattr_is_local(xs->here)) {
836                         memcpy(buffer, (void *)xs->base +
837                                le16_to_cpu(xs->here->xe_name_offset) +
838                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
839                 } else {
840                         xv = (struct ocfs2_xattr_value_root *)
841                                 (xs->base + le16_to_cpu(
842                                  xs->here->xe_name_offset) +
843                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
844                         ret = ocfs2_xattr_get_value_outside(inode, xv,
845                                                             buffer, size);
846                         if (ret < 0) {
847                                 mlog_errno(ret);
848                                 return ret;
849                         }
850                 }
851         }
852
853         return size;
854 }
855
856 static int ocfs2_xattr_block_get(struct inode *inode,
857                                  int name_index,
858                                  const char *name,
859                                  void *buffer,
860                                  size_t buffer_size,
861                                  struct ocfs2_xattr_search *xs)
862 {
863         struct ocfs2_xattr_block *xb;
864         struct ocfs2_xattr_value_root *xv;
865         size_t size;
866         int ret = -ENODATA, name_offset, name_len, block_off, i;
867
868         xs->bucket = ocfs2_xattr_bucket_new(inode);
869         if (!xs->bucket) {
870                 ret = -ENOMEM;
871                 mlog_errno(ret);
872                 goto cleanup;
873         }
874
875         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
876         if (ret) {
877                 mlog_errno(ret);
878                 goto cleanup;
879         }
880
881         if (xs->not_found) {
882                 ret = -ENODATA;
883                 goto cleanup;
884         }
885
886         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
887         size = le64_to_cpu(xs->here->xe_value_size);
888         if (buffer) {
889                 ret = -ERANGE;
890                 if (size > buffer_size)
891                         goto cleanup;
892
893                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
894                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
895                 i = xs->here - xs->header->xh_entries;
896
897                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
898                         ret = ocfs2_xattr_bucket_get_name_value(inode,
899                                                                 bucket_xh(xs->bucket),
900                                                                 i,
901                                                                 &block_off,
902                                                                 &name_offset);
903                         xs->base = bucket_block(xs->bucket, block_off);
904                 }
905                 if (ocfs2_xattr_is_local(xs->here)) {
906                         memcpy(buffer, (void *)xs->base +
907                                name_offset + name_len, size);
908                 } else {
909                         xv = (struct ocfs2_xattr_value_root *)
910                                 (xs->base + name_offset + name_len);
911                         ret = ocfs2_xattr_get_value_outside(inode, xv,
912                                                             buffer, size);
913                         if (ret < 0) {
914                                 mlog_errno(ret);
915                                 goto cleanup;
916                         }
917                 }
918         }
919         ret = size;
920 cleanup:
921         ocfs2_xattr_bucket_free(xs->bucket);
922
923         brelse(xs->xattr_bh);
924         xs->xattr_bh = NULL;
925         return ret;
926 }
927
928 /* ocfs2_xattr_get()
929  *
930  * Copy an extended attribute into the buffer provided.
931  * Buffer is NULL to compute the size of buffer required.
932  */
933 static int ocfs2_xattr_get(struct inode *inode,
934                            int name_index,
935                            const char *name,
936                            void *buffer,
937                            size_t buffer_size)
938 {
939         int ret;
940         struct ocfs2_dinode *di = NULL;
941         struct buffer_head *di_bh = NULL;
942         struct ocfs2_inode_info *oi = OCFS2_I(inode);
943         struct ocfs2_xattr_search xis = {
944                 .not_found = -ENODATA,
945         };
946         struct ocfs2_xattr_search xbs = {
947                 .not_found = -ENODATA,
948         };
949
950         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
951                 return -EOPNOTSUPP;
952
953         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
954                 ret = -ENODATA;
955
956         ret = ocfs2_inode_lock(inode, &di_bh, 0);
957         if (ret < 0) {
958                 mlog_errno(ret);
959                 return ret;
960         }
961         xis.inode_bh = xbs.inode_bh = di_bh;
962         di = (struct ocfs2_dinode *)di_bh->b_data;
963
964         down_read(&oi->ip_xattr_sem);
965         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
966                                     buffer_size, &xis);
967         if (ret == -ENODATA && di->i_xattr_loc)
968                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
969                                             buffer_size, &xbs);
970         up_read(&oi->ip_xattr_sem);
971         ocfs2_inode_unlock(inode, 0);
972
973         brelse(di_bh);
974
975         return ret;
976 }
977
978 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
979                                            handle_t *handle,
980                                            struct ocfs2_xattr_value_root *xv,
981                                            const void *value,
982                                            int value_len)
983 {
984         int ret = 0, i, cp_len, credits;
985         u16 blocksize = inode->i_sb->s_blocksize;
986         u32 p_cluster, num_clusters;
987         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
988         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
989         u64 blkno;
990         struct buffer_head *bh = NULL;
991
992         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
993
994         /*
995          * In __ocfs2_xattr_set_value_outside has already been dirtied,
996          * so we don't need to worry about whether ocfs2_extend_trans
997          * will create a new transactio for us or not.
998          */
999         credits = clusters * bpc;
1000         ret = ocfs2_extend_trans(handle, credits);
1001         if (ret) {
1002                 mlog_errno(ret);
1003                 goto out;
1004         }
1005
1006         while (cpos < clusters) {
1007                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1008                                                &num_clusters, &xv->xr_list);
1009                 if (ret) {
1010                         mlog_errno(ret);
1011                         goto out;
1012                 }
1013
1014                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1015
1016                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1017                         ret = ocfs2_read_block(inode, blkno, &bh);
1018                         if (ret) {
1019                                 mlog_errno(ret);
1020                                 goto out;
1021                         }
1022
1023                         ret = ocfs2_journal_access(handle,
1024                                                    inode,
1025                                                    bh,
1026                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1027                         if (ret < 0) {
1028                                 mlog_errno(ret);
1029                                 goto out;
1030                         }
1031
1032                         cp_len = value_len > blocksize ? blocksize : value_len;
1033                         memcpy(bh->b_data, value, cp_len);
1034                         value_len -= cp_len;
1035                         value += cp_len;
1036                         if (cp_len < blocksize)
1037                                 memset(bh->b_data + cp_len, 0,
1038                                        blocksize - cp_len);
1039
1040                         ret = ocfs2_journal_dirty(handle, bh);
1041                         if (ret < 0) {
1042                                 mlog_errno(ret);
1043                                 goto out;
1044                         }
1045                         brelse(bh);
1046                         bh = NULL;
1047
1048                         /*
1049                          * XXX: do we need to empty all the following
1050                          * blocks in this cluster?
1051                          */
1052                         if (!value_len)
1053                                 break;
1054                 }
1055                 cpos += num_clusters;
1056         }
1057 out:
1058         brelse(bh);
1059
1060         return ret;
1061 }
1062
1063 static int ocfs2_xattr_cleanup(struct inode *inode,
1064                                handle_t *handle,
1065                                struct ocfs2_xattr_info *xi,
1066                                struct ocfs2_xattr_search *xs,
1067                                size_t offs)
1068 {
1069         int ret = 0;
1070         size_t name_len = strlen(xi->name);
1071         void *val = xs->base + offs;
1072         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1073
1074         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1075                                    OCFS2_JOURNAL_ACCESS_WRITE);
1076         if (ret) {
1077                 mlog_errno(ret);
1078                 goto out;
1079         }
1080         /* Decrease xattr count */
1081         le16_add_cpu(&xs->header->xh_count, -1);
1082         /* Remove the xattr entry and tree root which has already be set*/
1083         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1084         memset(val, 0, size);
1085
1086         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1087         if (ret < 0)
1088                 mlog_errno(ret);
1089 out:
1090         return ret;
1091 }
1092
1093 static int ocfs2_xattr_update_entry(struct inode *inode,
1094                                     handle_t *handle,
1095                                     struct ocfs2_xattr_info *xi,
1096                                     struct ocfs2_xattr_search *xs,
1097                                     size_t offs)
1098 {
1099         int ret;
1100
1101         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1102                                    OCFS2_JOURNAL_ACCESS_WRITE);
1103         if (ret) {
1104                 mlog_errno(ret);
1105                 goto out;
1106         }
1107
1108         xs->here->xe_name_offset = cpu_to_le16(offs);
1109         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1110         if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1111                 ocfs2_xattr_set_local(xs->here, 1);
1112         else
1113                 ocfs2_xattr_set_local(xs->here, 0);
1114         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1115
1116         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1117         if (ret < 0)
1118                 mlog_errno(ret);
1119 out:
1120         return ret;
1121 }
1122
1123 /*
1124  * ocfs2_xattr_set_value_outside()
1125  *
1126  * Set large size value in B tree.
1127  */
1128 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1129                                          struct ocfs2_xattr_info *xi,
1130                                          struct ocfs2_xattr_search *xs,
1131                                          struct ocfs2_xattr_set_ctxt *ctxt,
1132                                          size_t offs)
1133 {
1134         size_t name_len = strlen(xi->name);
1135         void *val = xs->base + offs;
1136         struct ocfs2_xattr_value_root *xv = NULL;
1137         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1138         int ret = 0;
1139
1140         memset(val, 0, size);
1141         memcpy(val, xi->name, name_len);
1142         xv = (struct ocfs2_xattr_value_root *)
1143                 (val + OCFS2_XATTR_SIZE(name_len));
1144         xv->xr_clusters = 0;
1145         xv->xr_last_eb_blk = 0;
1146         xv->xr_list.l_tree_depth = 0;
1147         xv->xr_list.l_count = cpu_to_le16(1);
1148         xv->xr_list.l_next_free_rec = 0;
1149
1150         ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1151                                          xi->value_len, ctxt);
1152         if (ret < 0) {
1153                 mlog_errno(ret);
1154                 return ret;
1155         }
1156         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs);
1157         if (ret < 0) {
1158                 mlog_errno(ret);
1159                 return ret;
1160         }
1161         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv,
1162                                               xi->value, xi->value_len);
1163         if (ret < 0)
1164                 mlog_errno(ret);
1165
1166         return ret;
1167 }
1168
1169 /*
1170  * ocfs2_xattr_set_entry_local()
1171  *
1172  * Set, replace or remove extended attribute in local.
1173  */
1174 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1175                                         struct ocfs2_xattr_info *xi,
1176                                         struct ocfs2_xattr_search *xs,
1177                                         struct ocfs2_xattr_entry *last,
1178                                         size_t min_offs)
1179 {
1180         size_t name_len = strlen(xi->name);
1181         int i;
1182
1183         if (xi->value && xs->not_found) {
1184                 /* Insert the new xattr entry. */
1185                 le16_add_cpu(&xs->header->xh_count, 1);
1186                 ocfs2_xattr_set_type(last, xi->name_index);
1187                 ocfs2_xattr_set_local(last, 1);
1188                 last->xe_name_len = name_len;
1189         } else {
1190                 void *first_val;
1191                 void *val;
1192                 size_t offs, size;
1193
1194                 first_val = xs->base + min_offs;
1195                 offs = le16_to_cpu(xs->here->xe_name_offset);
1196                 val = xs->base + offs;
1197
1198                 if (le64_to_cpu(xs->here->xe_value_size) >
1199                     OCFS2_XATTR_INLINE_SIZE)
1200                         size = OCFS2_XATTR_SIZE(name_len) +
1201                                 OCFS2_XATTR_ROOT_SIZE;
1202                 else
1203                         size = OCFS2_XATTR_SIZE(name_len) +
1204                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1205
1206                 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1207                                 OCFS2_XATTR_SIZE(xi->value_len)) {
1208                         /* The old and the new value have the
1209                            same size. Just replace the value. */
1210                         ocfs2_xattr_set_local(xs->here, 1);
1211                         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1212                         /* Clear value bytes. */
1213                         memset(val + OCFS2_XATTR_SIZE(name_len),
1214                                0,
1215                                OCFS2_XATTR_SIZE(xi->value_len));
1216                         memcpy(val + OCFS2_XATTR_SIZE(name_len),
1217                                xi->value,
1218                                xi->value_len);
1219                         return;
1220                 }
1221                 /* Remove the old name+value. */
1222                 memmove(first_val + size, first_val, val - first_val);
1223                 memset(first_val, 0, size);
1224                 xs->here->xe_name_hash = 0;
1225                 xs->here->xe_name_offset = 0;
1226                 ocfs2_xattr_set_local(xs->here, 1);
1227                 xs->here->xe_value_size = 0;
1228
1229                 min_offs += size;
1230
1231                 /* Adjust all value offsets. */
1232                 last = xs->header->xh_entries;
1233                 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1234                         size_t o = le16_to_cpu(last->xe_name_offset);
1235
1236                         if (o < offs)
1237                                 last->xe_name_offset = cpu_to_le16(o + size);
1238                         last += 1;
1239                 }
1240
1241                 if (!xi->value) {
1242                         /* Remove the old entry. */
1243                         last -= 1;
1244                         memmove(xs->here, xs->here + 1,
1245                                 (void *)last - (void *)xs->here);
1246                         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1247                         le16_add_cpu(&xs->header->xh_count, -1);
1248                 }
1249         }
1250         if (xi->value) {
1251                 /* Insert the new name+value. */
1252                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1253                                 OCFS2_XATTR_SIZE(xi->value_len);
1254                 void *val = xs->base + min_offs - size;
1255
1256                 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1257                 memset(val, 0, size);
1258                 memcpy(val, xi->name, name_len);
1259                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1260                        xi->value,
1261                        xi->value_len);
1262                 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1263                 ocfs2_xattr_set_local(xs->here, 1);
1264                 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1265         }
1266
1267         return;
1268 }
1269
1270 /*
1271  * ocfs2_xattr_set_entry()
1272  *
1273  * Set extended attribute entry into inode or block.
1274  *
1275  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1276  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1277  * then set value in B tree with set_value_outside().
1278  */
1279 static int ocfs2_xattr_set_entry(struct inode *inode,
1280                                  struct ocfs2_xattr_info *xi,
1281                                  struct ocfs2_xattr_search *xs,
1282                                  struct ocfs2_xattr_set_ctxt *ctxt,
1283                                  int flag)
1284 {
1285         struct ocfs2_xattr_entry *last;
1286         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1287         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1288         size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1289         size_t size_l = 0;
1290         handle_t *handle = ctxt->handle;
1291         int free, i, ret;
1292         struct ocfs2_xattr_info xi_l = {
1293                 .name_index = xi->name_index,
1294                 .name = xi->name,
1295                 .value = xi->value,
1296                 .value_len = xi->value_len,
1297         };
1298
1299         /* Compute min_offs, last and free space. */
1300         last = xs->header->xh_entries;
1301
1302         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1303                 size_t offs = le16_to_cpu(last->xe_name_offset);
1304                 if (offs < min_offs)
1305                         min_offs = offs;
1306                 last += 1;
1307         }
1308
1309         free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1310         if (free < 0)
1311                 return -EIO;
1312
1313         if (!xs->not_found) {
1314                 size_t size = 0;
1315                 if (ocfs2_xattr_is_local(xs->here))
1316                         size = OCFS2_XATTR_SIZE(name_len) +
1317                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1318                 else
1319                         size = OCFS2_XATTR_SIZE(name_len) +
1320                                 OCFS2_XATTR_ROOT_SIZE;
1321                 free += (size + sizeof(struct ocfs2_xattr_entry));
1322         }
1323         /* Check free space in inode or block */
1324         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1325                 if (free < sizeof(struct ocfs2_xattr_entry) +
1326                            OCFS2_XATTR_SIZE(name_len) +
1327                            OCFS2_XATTR_ROOT_SIZE) {
1328                         ret = -ENOSPC;
1329                         goto out;
1330                 }
1331                 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1332                 xi_l.value = (void *)&def_xv;
1333                 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1334         } else if (xi->value) {
1335                 if (free < sizeof(struct ocfs2_xattr_entry) +
1336                            OCFS2_XATTR_SIZE(name_len) +
1337                            OCFS2_XATTR_SIZE(xi->value_len)) {
1338                         ret = -ENOSPC;
1339                         goto out;
1340                 }
1341         }
1342
1343         if (!xs->not_found) {
1344                 /* For existing extended attribute */
1345                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1346                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1347                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1348                 void *val = xs->base + offs;
1349
1350                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1351                         /* Replace existing local xattr with tree root */
1352                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1353                                                             ctxt, offs);
1354                         if (ret < 0)
1355                                 mlog_errno(ret);
1356                         goto out;
1357                 } else if (!ocfs2_xattr_is_local(xs->here)) {
1358                         /* For existing xattr which has value outside */
1359                         struct ocfs2_xattr_value_root *xv = NULL;
1360                         xv = (struct ocfs2_xattr_value_root *)(val +
1361                                 OCFS2_XATTR_SIZE(name_len));
1362
1363                         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1364                                 /*
1365                                  * If new value need set outside also,
1366                                  * first truncate old value to new value,
1367                                  * then set new value with set_value_outside().
1368                                  */
1369                                 ret = ocfs2_xattr_value_truncate(inode,
1370                                                                  xs->xattr_bh,
1371                                                                  xv,
1372                                                                  xi->value_len,
1373                                                                  ctxt);
1374                                 if (ret < 0) {
1375                                         mlog_errno(ret);
1376                                         goto out;
1377                                 }
1378
1379                                 ret = ocfs2_xattr_update_entry(inode,
1380                                                                handle,
1381                                                                xi,
1382                                                                xs,
1383                                                                offs);
1384                                 if (ret < 0) {
1385                                         mlog_errno(ret);
1386                                         goto out;
1387                                 }
1388
1389                                 ret = __ocfs2_xattr_set_value_outside(inode,
1390                                                                 handle,
1391                                                                 xv,
1392                                                                 xi->value,
1393                                                                 xi->value_len);
1394                                 if (ret < 0)
1395                                         mlog_errno(ret);
1396                                 goto out;
1397                         } else {
1398                                 /*
1399                                  * If new value need set in local,
1400                                  * just trucate old value to zero.
1401                                  */
1402                                  ret = ocfs2_xattr_value_truncate(inode,
1403                                                                   xs->xattr_bh,
1404                                                                   xv,
1405                                                                   0,
1406                                                                   ctxt);
1407                                 if (ret < 0)
1408                                         mlog_errno(ret);
1409                         }
1410                 }
1411         }
1412
1413         ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1414                                    OCFS2_JOURNAL_ACCESS_WRITE);
1415         if (ret) {
1416                 mlog_errno(ret);
1417                 goto out;
1418         }
1419
1420         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1421                 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1422                                            OCFS2_JOURNAL_ACCESS_WRITE);
1423                 if (ret) {
1424                         mlog_errno(ret);
1425                         goto out;
1426                 }
1427         }
1428
1429         /*
1430          * Set value in local, include set tree root in local.
1431          * This is the first step for value size >INLINE_SIZE.
1432          */
1433         ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1434
1435         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1436                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1437                 if (ret < 0) {
1438                         mlog_errno(ret);
1439                         goto out;
1440                 }
1441         }
1442
1443         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1444             (flag & OCFS2_INLINE_XATTR_FL)) {
1445                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1446                 unsigned int xattrsize = osb->s_xattr_inline_size;
1447
1448                 /*
1449                  * Adjust extent record count or inline data size
1450                  * to reserve space for extended attribute.
1451                  */
1452                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1453                         struct ocfs2_inline_data *idata = &di->id2.i_data;
1454                         le16_add_cpu(&idata->id_count, -xattrsize);
1455                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1456                         struct ocfs2_extent_list *el = &di->id2.i_list;
1457                         le16_add_cpu(&el->l_count, -(xattrsize /
1458                                         sizeof(struct ocfs2_extent_rec)));
1459                 }
1460                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1461         }
1462         /* Update xattr flag */
1463         spin_lock(&oi->ip_lock);
1464         oi->ip_dyn_features |= flag;
1465         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1466         spin_unlock(&oi->ip_lock);
1467         /* Update inode ctime */
1468         inode->i_ctime = CURRENT_TIME;
1469         di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1470         di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1471
1472         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1473         if (ret < 0)
1474                 mlog_errno(ret);
1475
1476         if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1477                 /*
1478                  * Set value outside in B tree.
1479                  * This is the second step for value size > INLINE_SIZE.
1480                  */
1481                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1482                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs);
1483                 if (ret < 0) {
1484                         int ret2;
1485
1486                         mlog_errno(ret);
1487                         /*
1488                          * If set value outside failed, we have to clean
1489                          * the junk tree root we have already set in local.
1490                          */
1491                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1492                                                    xi, xs, offs);
1493                         if (ret2 < 0)
1494                                 mlog_errno(ret2);
1495                 }
1496         }
1497 out:
1498         return ret;
1499 }
1500
1501 static int ocfs2_remove_value_outside(struct inode*inode,
1502                                       struct buffer_head *bh,
1503                                       struct ocfs2_xattr_header *header)
1504 {
1505         int ret = 0, i;
1506         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1507         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1508
1509         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1510
1511         ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
1512         if (IS_ERR(ctxt.handle)) {
1513                 ret = PTR_ERR(ctxt.handle);
1514                 mlog_errno(ret);
1515                 goto out;
1516         }
1517
1518         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1519                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1520
1521                 if (!ocfs2_xattr_is_local(entry)) {
1522                         struct ocfs2_xattr_value_root *xv;
1523                         void *val;
1524
1525                         val = (void *)header +
1526                                 le16_to_cpu(entry->xe_name_offset);
1527                         xv = (struct ocfs2_xattr_value_root *)
1528                                 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1529                         ret = ocfs2_xattr_value_truncate(inode, bh, xv,
1530                                                          0, &ctxt);
1531                         if (ret < 0) {
1532                                 mlog_errno(ret);
1533                                 break;
1534                         }
1535                 }
1536         }
1537
1538         ocfs2_commit_trans(osb, ctxt.handle);
1539         ocfs2_schedule_truncate_log_flush(osb, 1);
1540         ocfs2_run_deallocs(osb, &ctxt.dealloc);
1541 out:
1542         return ret;
1543 }
1544
1545 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1546                                     struct buffer_head *di_bh)
1547 {
1548
1549         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1550         struct ocfs2_xattr_header *header;
1551         int ret;
1552
1553         header = (struct ocfs2_xattr_header *)
1554                  ((void *)di + inode->i_sb->s_blocksize -
1555                  le16_to_cpu(di->i_xattr_inline_size));
1556
1557         ret = ocfs2_remove_value_outside(inode, di_bh, header);
1558
1559         return ret;
1560 }
1561
1562 static int ocfs2_xattr_block_remove(struct inode *inode,
1563                                     struct buffer_head *blk_bh)
1564 {
1565         struct ocfs2_xattr_block *xb;
1566         int ret = 0;
1567
1568         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1569         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1570                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1571                 ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1572         } else
1573                 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1574
1575         return ret;
1576 }
1577
1578 static int ocfs2_xattr_free_block(struct inode *inode,
1579                                   u64 block)
1580 {
1581         struct inode *xb_alloc_inode;
1582         struct buffer_head *xb_alloc_bh = NULL;
1583         struct buffer_head *blk_bh = NULL;
1584         struct ocfs2_xattr_block *xb;
1585         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1586         handle_t *handle;
1587         int ret = 0;
1588         u64 blk, bg_blkno;
1589         u16 bit;
1590
1591         ret = ocfs2_read_block(inode, block, &blk_bh);
1592         if (ret < 0) {
1593                 mlog_errno(ret);
1594                 goto out;
1595         }
1596
1597         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1598         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1599                 ret = -EIO;
1600                 goto out;
1601         }
1602
1603         ret = ocfs2_xattr_block_remove(inode, blk_bh);
1604         if (ret < 0) {
1605                 mlog_errno(ret);
1606                 goto out;
1607         }
1608
1609         blk = le64_to_cpu(xb->xb_blkno);
1610         bit = le16_to_cpu(xb->xb_suballoc_bit);
1611         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1612
1613         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1614                                 EXTENT_ALLOC_SYSTEM_INODE,
1615                                 le16_to_cpu(xb->xb_suballoc_slot));
1616         if (!xb_alloc_inode) {
1617                 ret = -ENOMEM;
1618                 mlog_errno(ret);
1619                 goto out;
1620         }
1621         mutex_lock(&xb_alloc_inode->i_mutex);
1622
1623         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1624         if (ret < 0) {
1625                 mlog_errno(ret);
1626                 goto out_mutex;
1627         }
1628
1629         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1630         if (IS_ERR(handle)) {
1631                 ret = PTR_ERR(handle);
1632                 mlog_errno(ret);
1633                 goto out_unlock;
1634         }
1635
1636         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1637                                        bit, bg_blkno, 1);
1638         if (ret < 0)
1639                 mlog_errno(ret);
1640
1641         ocfs2_commit_trans(osb, handle);
1642 out_unlock:
1643         ocfs2_inode_unlock(xb_alloc_inode, 1);
1644         brelse(xb_alloc_bh);
1645 out_mutex:
1646         mutex_unlock(&xb_alloc_inode->i_mutex);
1647         iput(xb_alloc_inode);
1648 out:
1649         brelse(blk_bh);
1650         return ret;
1651 }
1652
1653 /*
1654  * ocfs2_xattr_remove()
1655  *
1656  * Free extended attribute resources associated with this inode.
1657  */
1658 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1659 {
1660         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1661         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1662         handle_t *handle;
1663         int ret;
1664
1665         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1666                 return 0;
1667
1668         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1669                 return 0;
1670
1671         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1672                 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1673                 if (ret < 0) {
1674                         mlog_errno(ret);
1675                         goto out;
1676                 }
1677         }
1678
1679         if (di->i_xattr_loc) {
1680                 ret = ocfs2_xattr_free_block(inode,
1681                                              le64_to_cpu(di->i_xattr_loc));
1682                 if (ret < 0) {
1683                         mlog_errno(ret);
1684                         goto out;
1685                 }
1686         }
1687
1688         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1689                                    OCFS2_INODE_UPDATE_CREDITS);
1690         if (IS_ERR(handle)) {
1691                 ret = PTR_ERR(handle);
1692                 mlog_errno(ret);
1693                 goto out;
1694         }
1695         ret = ocfs2_journal_access(handle, inode, di_bh,
1696                                    OCFS2_JOURNAL_ACCESS_WRITE);
1697         if (ret) {
1698                 mlog_errno(ret);
1699                 goto out_commit;
1700         }
1701
1702         di->i_xattr_loc = 0;
1703
1704         spin_lock(&oi->ip_lock);
1705         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1706         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1707         spin_unlock(&oi->ip_lock);
1708
1709         ret = ocfs2_journal_dirty(handle, di_bh);
1710         if (ret < 0)
1711                 mlog_errno(ret);
1712 out_commit:
1713         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1714 out:
1715         return ret;
1716 }
1717
1718 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1719                                         struct ocfs2_dinode *di)
1720 {
1721         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1722         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1723         int free;
1724
1725         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1726                 return 0;
1727
1728         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1729                 struct ocfs2_inline_data *idata = &di->id2.i_data;
1730                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1731         } else if (ocfs2_inode_is_fast_symlink(inode)) {
1732                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1733                         le64_to_cpu(di->i_size);
1734         } else {
1735                 struct ocfs2_extent_list *el = &di->id2.i_list;
1736                 free = (le16_to_cpu(el->l_count) -
1737                         le16_to_cpu(el->l_next_free_rec)) *
1738                         sizeof(struct ocfs2_extent_rec);
1739         }
1740         if (free >= xattrsize)
1741                 return 1;
1742
1743         return 0;
1744 }
1745
1746 /*
1747  * ocfs2_xattr_ibody_find()
1748  *
1749  * Find extended attribute in inode block and
1750  * fill search info into struct ocfs2_xattr_search.
1751  */
1752 static int ocfs2_xattr_ibody_find(struct inode *inode,
1753                                   int name_index,
1754                                   const char *name,
1755                                   struct ocfs2_xattr_search *xs)
1756 {
1757         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1758         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1759         int ret;
1760         int has_space = 0;
1761
1762         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1763                 return 0;
1764
1765         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1766                 down_read(&oi->ip_alloc_sem);
1767                 has_space = ocfs2_xattr_has_space_inline(inode, di);
1768                 up_read(&oi->ip_alloc_sem);
1769                 if (!has_space)
1770                         return 0;
1771         }
1772
1773         xs->xattr_bh = xs->inode_bh;
1774         xs->end = (void *)di + inode->i_sb->s_blocksize;
1775         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1776                 xs->header = (struct ocfs2_xattr_header *)
1777                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1778         else
1779                 xs->header = (struct ocfs2_xattr_header *)
1780                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1781         xs->base = (void *)xs->header;
1782         xs->here = xs->header->xh_entries;
1783
1784         /* Find the named attribute. */
1785         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1786                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1787                 if (ret && ret != -ENODATA)
1788                         return ret;
1789                 xs->not_found = ret;
1790         }
1791
1792         return 0;
1793 }
1794
1795 /*
1796  * ocfs2_xattr_ibody_set()
1797  *
1798  * Set, replace or remove an extended attribute into inode block.
1799  *
1800  */
1801 static int ocfs2_xattr_ibody_set(struct inode *inode,
1802                                  struct ocfs2_xattr_info *xi,
1803                                  struct ocfs2_xattr_search *xs,
1804                                  struct ocfs2_xattr_set_ctxt *ctxt)
1805 {
1806         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1807         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1808         int ret;
1809
1810         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1811                 return -ENOSPC;
1812
1813         down_write(&oi->ip_alloc_sem);
1814         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1815                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
1816                         ret = -ENOSPC;
1817                         goto out;
1818                 }
1819         }
1820
1821         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
1822                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1823 out:
1824         up_write(&oi->ip_alloc_sem);
1825
1826         return ret;
1827 }
1828
1829 /*
1830  * ocfs2_xattr_block_find()
1831  *
1832  * Find extended attribute in external block and
1833  * fill search info into struct ocfs2_xattr_search.
1834  */
1835 static int ocfs2_xattr_block_find(struct inode *inode,
1836                                   int name_index,
1837                                   const char *name,
1838                                   struct ocfs2_xattr_search *xs)
1839 {
1840         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1841         struct buffer_head *blk_bh = NULL;
1842         struct ocfs2_xattr_block *xb;
1843         int ret = 0;
1844
1845         if (!di->i_xattr_loc)
1846                 return ret;
1847
1848         ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
1849         if (ret < 0) {
1850                 mlog_errno(ret);
1851                 return ret;
1852         }
1853
1854         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1855         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1856                 ret = -EIO;
1857                 goto cleanup;
1858         }
1859
1860         xs->xattr_bh = blk_bh;
1861
1862         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1863                 xs->header = &xb->xb_attrs.xb_header;
1864                 xs->base = (void *)xs->header;
1865                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1866                 xs->here = xs->header->xh_entries;
1867
1868                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1869         } else
1870                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1871                                                    name_index,
1872                                                    name, xs);
1873
1874         if (ret && ret != -ENODATA) {
1875                 xs->xattr_bh = NULL;
1876                 goto cleanup;
1877         }
1878         xs->not_found = ret;
1879         return 0;
1880 cleanup:
1881         brelse(blk_bh);
1882
1883         return ret;
1884 }
1885
1886 /*
1887  * ocfs2_xattr_block_set()
1888  *
1889  * Set, replace or remove an extended attribute into external block.
1890  *
1891  */
1892 static int ocfs2_xattr_block_set(struct inode *inode,
1893                                  struct ocfs2_xattr_info *xi,
1894                                  struct ocfs2_xattr_search *xs,
1895                                  struct ocfs2_xattr_set_ctxt *ctxt)
1896 {
1897         struct buffer_head *new_bh = NULL;
1898         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1899         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
1900         handle_t *handle = ctxt->handle;
1901         struct ocfs2_xattr_block *xblk = NULL;
1902         u16 suballoc_bit_start;
1903         u32 num_got;
1904         u64 first_blkno;
1905         int ret;
1906
1907         if (!xs->xattr_bh) {
1908                 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1909                                            OCFS2_JOURNAL_ACCESS_CREATE);
1910                 if (ret < 0) {
1911                         mlog_errno(ret);
1912                         goto end;
1913                 }
1914
1915                 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
1916                                            &suballoc_bit_start, &num_got,
1917                                            &first_blkno);
1918                 if (ret < 0) {
1919                         mlog_errno(ret);
1920                         goto end;
1921                 }
1922
1923                 new_bh = sb_getblk(inode->i_sb, first_blkno);
1924                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1925
1926                 ret = ocfs2_journal_access(handle, inode, new_bh,
1927                                            OCFS2_JOURNAL_ACCESS_CREATE);
1928                 if (ret < 0) {
1929                         mlog_errno(ret);
1930                         goto end;
1931                 }
1932
1933                 /* Initialize ocfs2_xattr_block */
1934                 xs->xattr_bh = new_bh;
1935                 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
1936                 memset(xblk, 0, inode->i_sb->s_blocksize);
1937                 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
1938                 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
1939                 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1940                 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
1941                 xblk->xb_blkno = cpu_to_le64(first_blkno);
1942
1943                 xs->header = &xblk->xb_attrs.xb_header;
1944                 xs->base = (void *)xs->header;
1945                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1946                 xs->here = xs->header->xh_entries;
1947
1948                 ret = ocfs2_journal_dirty(handle, new_bh);
1949                 if (ret < 0) {
1950                         mlog_errno(ret);
1951                         goto end;
1952                 }
1953                 di->i_xattr_loc = cpu_to_le64(first_blkno);
1954                 ocfs2_journal_dirty(handle, xs->inode_bh);
1955         } else
1956                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1957
1958         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1959                 /* Set extended attribute into external block */
1960                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
1961                                             OCFS2_HAS_XATTR_FL);
1962                 if (!ret || ret != -ENOSPC)
1963                         goto end;
1964
1965                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
1966                 if (ret)
1967                         goto end;
1968         }
1969
1970         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
1971
1972 end:
1973
1974         return ret;
1975 }
1976
1977 /* Check whether the new xattr can be inserted into the inode. */
1978 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
1979                                        struct ocfs2_xattr_info *xi,
1980                                        struct ocfs2_xattr_search *xs)
1981 {
1982         u64 value_size;
1983         struct ocfs2_xattr_entry *last;
1984         int free, i;
1985         size_t min_offs = xs->end - xs->base;
1986
1987         if (!xs->header)
1988                 return 0;
1989
1990         last = xs->header->xh_entries;
1991
1992         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1993                 size_t offs = le16_to_cpu(last->xe_name_offset);
1994                 if (offs < min_offs)
1995                         min_offs = offs;
1996                 last += 1;
1997         }
1998
1999         free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
2000         if (free < 0)
2001                 return 0;
2002
2003         BUG_ON(!xs->not_found);
2004
2005         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2006                 value_size = OCFS2_XATTR_ROOT_SIZE;
2007         else
2008                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2009
2010         if (free >= sizeof(struct ocfs2_xattr_entry) +
2011                    OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2012                 return 1;
2013
2014         return 0;
2015 }
2016
2017 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2018                                      struct ocfs2_dinode *di,
2019                                      struct ocfs2_xattr_info *xi,
2020                                      struct ocfs2_xattr_search *xis,
2021                                      struct ocfs2_xattr_search *xbs,
2022                                      int *clusters_need,
2023                                      int *meta_need,
2024                                      int *credits_need)
2025 {
2026         int ret = 0, old_in_xb = 0;
2027         int clusters_add = 0, meta_add = 0, credits = 0;
2028         struct buffer_head *bh = NULL;
2029         struct ocfs2_xattr_block *xb = NULL;
2030         struct ocfs2_xattr_entry *xe = NULL;
2031         struct ocfs2_xattr_value_root *xv = NULL;
2032         char *base = NULL;
2033         int name_offset, name_len = 0;
2034         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2035                                                     xi->value_len);
2036         u64 value_size;
2037
2038         if (xis->not_found && xbs->not_found) {
2039                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2040
2041                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2042                         clusters_add += new_clusters;
2043                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2044                                                         &def_xv.xv.xr_list,
2045                                                         new_clusters);
2046                 }
2047
2048                 goto meta_guess;
2049         }
2050
2051         if (!xis->not_found) {
2052                 xe = xis->here;
2053                 name_offset = le16_to_cpu(xe->xe_name_offset);
2054                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2055                 base = xis->base;
2056                 credits += OCFS2_INODE_UPDATE_CREDITS;
2057         } else {
2058                 int i, block_off;
2059                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2060                 xe = xbs->here;
2061                 name_offset = le16_to_cpu(xe->xe_name_offset);
2062                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2063                 i = xbs->here - xbs->header->xh_entries;
2064                 old_in_xb = 1;
2065
2066                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2067                         ret = ocfs2_xattr_bucket_get_name_value(inode,
2068                                                         bucket_xh(xbs->bucket),
2069                                                         i, &block_off,
2070                                                         &name_offset);
2071                         base = bucket_block(xbs->bucket, block_off);
2072                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2073                 } else {
2074                         base = xbs->base;
2075                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2076                 }
2077         }
2078
2079         /*
2080          * delete a xattr doesn't need metadata and cluster allocation.
2081          * so just calculate the credits and return.
2082          *
2083          * The credits for removing the value tree will be extended
2084          * by ocfs2_remove_extent itself.
2085          */
2086         if (!xi->value) {
2087                 if (!ocfs2_xattr_is_local(xe))
2088                         credits += OCFS2_REMOVE_EXTENT_CREDITS;
2089
2090                 goto out;
2091         }
2092
2093         /* do cluster allocation guess first. */
2094         value_size = le64_to_cpu(xe->xe_value_size);
2095
2096         if (old_in_xb) {
2097                 /*
2098                  * In xattr set, we always try to set the xe in inode first,
2099                  * so if it can be inserted into inode successfully, the old
2100                  * one will be removed from the xattr block, and this xattr
2101                  * will be inserted into inode as a new xattr in inode.
2102                  */
2103                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2104                         clusters_add += new_clusters;
2105                         credits += OCFS2_REMOVE_EXTENT_CREDITS +
2106                                     OCFS2_INODE_UPDATE_CREDITS;
2107                         if (!ocfs2_xattr_is_local(xe))
2108                                 credits += ocfs2_calc_extend_credits(
2109                                                         inode->i_sb,
2110                                                         &def_xv.xv.xr_list,
2111                                                         new_clusters);
2112                         goto out;
2113                 }
2114         }
2115
2116         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2117                 /* the new values will be stored outside. */
2118                 u32 old_clusters = 0;
2119
2120                 if (!ocfs2_xattr_is_local(xe)) {
2121                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
2122                                                                  value_size);
2123                         xv = (struct ocfs2_xattr_value_root *)
2124                              (base + name_offset + name_len);
2125                 } else
2126                         xv = &def_xv.xv;
2127
2128                 if (old_clusters >= new_clusters) {
2129                         credits += OCFS2_REMOVE_EXTENT_CREDITS;
2130                         goto out;
2131                 } else {
2132                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2133                         clusters_add += new_clusters - old_clusters;
2134                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2135                                                              &xv->xr_list,
2136                                                              new_clusters -
2137                                                              old_clusters);
2138                         goto out;
2139                 }
2140         } else {
2141                 /*
2142                  * Now the new value will be stored inside. So if the new
2143                  * value is smaller than the size of value root or the old
2144                  * value, we don't need any allocation, otherwise we have
2145                  * to guess metadata allocation.
2146                  */
2147                 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2148                     (!ocfs2_xattr_is_local(xe) &&
2149                      OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2150                         goto out;
2151         }
2152
2153 meta_guess:
2154         /* calculate metadata allocation. */
2155         if (di->i_xattr_loc) {
2156                 if (!xbs->xattr_bh) {
2157                         ret = ocfs2_read_block(inode,
2158                                                le64_to_cpu(di->i_xattr_loc),
2159                                                &bh);
2160                         if (ret) {
2161                                 mlog_errno(ret);
2162                                 goto out;
2163                         }
2164
2165                         xb = (struct ocfs2_xattr_block *)bh->b_data;
2166                 } else
2167                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2168
2169                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2170                         struct ocfs2_extent_list *el =
2171                                  &xb->xb_attrs.xb_root.xt_list;
2172                         meta_add += ocfs2_extend_meta_needed(el);
2173                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2174                                                              el, 1);
2175                 }
2176
2177                 /*
2178                  * This cluster will be used either for new bucket or for
2179                  * new xattr block.
2180                  * If the cluster size is the same as the bucket size, one
2181                  * more is needed since we may need to extend the bucket
2182                  * also.
2183                  */
2184                 clusters_add += 1;
2185                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2186                 if (OCFS2_XATTR_BUCKET_SIZE ==
2187                         OCFS2_SB(inode->i_sb)->s_clustersize) {
2188                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2189                         clusters_add += 1;
2190                 }
2191         } else {
2192                 meta_add += 1;
2193                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2194         }
2195 out:
2196         if (clusters_need)
2197                 *clusters_need = clusters_add;
2198         if (meta_need)
2199                 *meta_need = meta_add;
2200         if (credits_need)
2201                 *credits_need = credits;
2202         brelse(bh);
2203         return ret;
2204 }
2205
2206 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2207                                      struct ocfs2_dinode *di,
2208                                      struct ocfs2_xattr_info *xi,
2209                                      struct ocfs2_xattr_search *xis,
2210                                      struct ocfs2_xattr_search *xbs,
2211                                      struct ocfs2_xattr_set_ctxt *ctxt,
2212                                      int *credits)
2213 {
2214         int clusters_add, meta_add, ret;
2215         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2216
2217         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2218
2219         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2220
2221         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2222                                         &clusters_add, &meta_add, credits);
2223         if (ret) {
2224                 mlog_errno(ret);
2225                 return ret;
2226         }
2227
2228         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2229              "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2230
2231         if (meta_add) {
2232                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2233                                                         &ctxt->meta_ac);
2234                 if (ret) {
2235                         mlog_errno(ret);
2236                         goto out;
2237                 }
2238         }
2239
2240         if (clusters_add) {
2241                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2242                 if (ret)
2243                         mlog_errno(ret);
2244         }
2245 out:
2246         if (ret) {
2247                 if (ctxt->meta_ac) {
2248                         ocfs2_free_alloc_context(ctxt->meta_ac);
2249                         ctxt->meta_ac = NULL;
2250                 }
2251
2252                 /*
2253                  * We cannot have an error and a non null ctxt->data_ac.
2254                  */
2255         }
2256
2257         return ret;
2258 }
2259
2260 static int __ocfs2_xattr_set_handle(struct inode *inode,
2261                                     struct ocfs2_dinode *di,
2262                                     struct ocfs2_xattr_info *xi,
2263                                     struct ocfs2_xattr_search *xis,
2264                                     struct ocfs2_xattr_search *xbs,
2265                                     struct ocfs2_xattr_set_ctxt *ctxt)
2266 {
2267         int ret = 0, credits;
2268
2269         if (!xi->value) {
2270                 /* Remove existing extended attribute */
2271                 if (!xis->not_found)
2272                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2273                 else if (!xbs->not_found)
2274                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2275         } else {
2276                 /* We always try to set extended attribute into inode first*/
2277                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2278                 if (!ret && !xbs->not_found) {
2279                         /*
2280                          * If succeed and that extended attribute existing in
2281                          * external block, then we will remove it.
2282                          */
2283                         xi->value = NULL;
2284                         xi->value_len = 0;
2285
2286                         xis->not_found = -ENODATA;
2287                         ret = ocfs2_calc_xattr_set_need(inode,
2288                                                         di,
2289                                                         xi,
2290                                                         xis,
2291                                                         xbs,
2292                                                         NULL,
2293                                                         NULL,
2294                                                         &credits);
2295                         if (ret) {
2296                                 mlog_errno(ret);
2297                                 goto out;
2298                         }
2299
2300                         ret = ocfs2_extend_trans(ctxt->handle, credits +
2301                                         ctxt->handle->h_buffer_credits);
2302                         if (ret) {
2303                                 mlog_errno(ret);
2304                                 goto out;
2305                         }
2306                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2307                 } else if (ret == -ENOSPC) {
2308                         if (di->i_xattr_loc && !xbs->xattr_bh) {
2309                                 ret = ocfs2_xattr_block_find(inode,
2310                                                              xi->name_index,
2311                                                              xi->name, xbs);
2312                                 if (ret)
2313                                         goto out;
2314
2315                                 xis->not_found = -ENODATA;
2316                                 ret = ocfs2_calc_xattr_set_need(inode,
2317                                                                 di,
2318                                                                 xi,
2319                                                                 xis,
2320                                                                 xbs,
2321                                                                 NULL,
2322                                                                 NULL,
2323                                                                 &credits);
2324                                 if (ret) {
2325                                         mlog_errno(ret);
2326                                         goto out;
2327                                 }
2328
2329                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2330                                         ctxt->handle->h_buffer_credits);
2331                                 if (ret) {
2332                                         mlog_errno(ret);
2333                                         goto out;
2334                                 }
2335                         }
2336                         /*
2337                          * If no space in inode, we will set extended attribute
2338                          * into external block.
2339                          */
2340                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2341                         if (ret)
2342                                 goto out;
2343                         if (!xis->not_found) {
2344                                 /*
2345                                  * If succeed and that extended attribute
2346                                  * existing in inode, we will remove it.
2347                                  */
2348                                 xi->value = NULL;
2349                                 xi->value_len = 0;
2350                                 xbs->not_found = -ENODATA;
2351                                 ret = ocfs2_calc_xattr_set_need(inode,
2352                                                                 di,
2353                                                                 xi,
2354                                                                 xis,
2355                                                                 xbs,
2356                                                                 NULL,
2357                                                                 NULL,
2358                                                                 &credits);
2359                                 if (ret) {
2360                                         mlog_errno(ret);
2361                                         goto out;
2362                                 }
2363
2364                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2365                                                 ctxt->handle->h_buffer_credits);
2366                                 if (ret) {
2367                                         mlog_errno(ret);
2368                                         goto out;
2369                                 }
2370                                 ret = ocfs2_xattr_ibody_set(inode, xi,
2371                                                             xis, ctxt);
2372                         }
2373                 }
2374         }
2375
2376 out:
2377         return ret;
2378 }
2379
2380 /*
2381  * This function only called duing creating inode
2382  * for init security/acl xattrs of the new inode.
2383  * The xattrs could be put into ibody or extent block,
2384  * xattr bucket would not be use in this case.
2385  * transanction credits also be reserved in here.
2386  */
2387 int ocfs2_xattr_set_handle(handle_t *handle,
2388                            struct inode *inode,
2389                            struct buffer_head *di_bh,
2390                            int name_index,
2391                            const char *name,
2392                            const void *value,
2393                            size_t value_len,
2394                            int flags,
2395                            struct ocfs2_alloc_context *meta_ac,
2396                            struct ocfs2_alloc_context *data_ac)
2397 {
2398         struct ocfs2_dinode *di;
2399         int ret;
2400
2401         struct ocfs2_xattr_info xi = {
2402                 .name_index = name_index,
2403                 .name = name,
2404                 .value = value,
2405                 .value_len = value_len,
2406         };
2407
2408         struct ocfs2_xattr_search xis = {
2409                 .not_found = -ENODATA,
2410         };
2411
2412         struct ocfs2_xattr_search xbs = {
2413                 .not_found = -ENODATA,
2414         };
2415
2416         struct ocfs2_xattr_set_ctxt ctxt = {
2417                 .handle = handle,
2418                 .meta_ac = meta_ac,
2419                 .data_ac = data_ac,
2420         };
2421
2422         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2423                 return -EOPNOTSUPP;
2424
2425         xis.inode_bh = xbs.inode_bh = di_bh;
2426         di = (struct ocfs2_dinode *)di_bh->b_data;
2427
2428         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2429
2430         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2431         if (ret)
2432                 goto cleanup;
2433         if (xis.not_found) {
2434                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2435                 if (ret)
2436                         goto cleanup;
2437         }
2438
2439         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2440
2441 cleanup:
2442         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2443         brelse(xbs.xattr_bh);
2444
2445         return ret;
2446 }
2447
2448 /*
2449  * ocfs2_xattr_set()
2450  *
2451  * Set, replace or remove an extended attribute for this inode.
2452  * value is NULL to remove an existing extended attribute, else either
2453  * create or replace an extended attribute.
2454  */
2455 int ocfs2_xattr_set(struct inode *inode,
2456                     int name_index,
2457                     const char *name,
2458                     const void *value,
2459                     size_t value_len,
2460                     int flags)
2461 {
2462         struct buffer_head *di_bh = NULL;
2463         struct ocfs2_dinode *di;
2464         int ret, credits;
2465         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2466         struct inode *tl_inode = osb->osb_tl_inode;
2467         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2468
2469         struct ocfs2_xattr_info xi = {
2470                 .name_index = name_index,
2471                 .name = name,
2472                 .value = value,
2473                 .value_len = value_len,
2474         };
2475
2476         struct ocfs2_xattr_search xis = {
2477                 .not_found = -ENODATA,
2478         };
2479
2480         struct ocfs2_xattr_search xbs = {
2481                 .not_found = -ENODATA,
2482         };
2483
2484         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2485                 return -EOPNOTSUPP;
2486
2487         /*
2488          * Only xbs will be used on indexed trees.  xis doesn't need a
2489          * bucket.
2490          */
2491         xbs.bucket = ocfs2_xattr_bucket_new(inode);
2492         if (!xbs.bucket) {
2493                 mlog_errno(-ENOMEM);
2494                 return -ENOMEM;
2495         }
2496
2497         ret = ocfs2_inode_lock(inode, &di_bh, 1);
2498         if (ret < 0) {
2499                 mlog_errno(ret);
2500                 goto cleanup_nolock;
2501         }
2502         xis.inode_bh = xbs.inode_bh = di_bh;
2503         di = (struct ocfs2_dinode *)di_bh->b_data;
2504
2505         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2506         /*
2507          * Scan inode and external block to find the same name
2508          * extended attribute and collect search infomation.
2509          */
2510         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2511         if (ret)
2512                 goto cleanup;
2513         if (xis.not_found) {
2514                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2515                 if (ret)
2516                         goto cleanup;
2517         }
2518
2519         if (xis.not_found && xbs.not_found) {
2520                 ret = -ENODATA;
2521                 if (flags & XATTR_REPLACE)
2522                         goto cleanup;
2523                 ret = 0;
2524                 if (!value)
2525                         goto cleanup;
2526         } else {
2527                 ret = -EEXIST;
2528                 if (flags & XATTR_CREATE)
2529                         goto cleanup;
2530         }
2531
2532
2533         mutex_lock(&tl_inode->i_mutex);
2534
2535         if (ocfs2_truncate_log_needs_flush(osb)) {
2536                 ret = __ocfs2_flush_truncate_log(osb);
2537                 if (ret < 0) {
2538                         mutex_unlock(&tl_inode->i_mutex);
2539                         mlog_errno(ret);
2540                         goto cleanup;
2541                 }
2542         }
2543         mutex_unlock(&tl_inode->i_mutex);
2544
2545         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2546                                         &xbs, &ctxt, &credits);
2547         if (ret) {
2548                 mlog_errno(ret);
2549                 goto cleanup;
2550         }
2551
2552         ctxt.handle = ocfs2_start_trans(osb, credits);
2553         if (IS_ERR(ctxt.handle)) {
2554                 ret = PTR_ERR(ctxt.handle);
2555                 mlog_errno(ret);
2556                 goto cleanup;
2557         }
2558
2559         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2560
2561         ocfs2_commit_trans(osb, ctxt.handle);
2562
2563         if (ctxt.data_ac)
2564                 ocfs2_free_alloc_context(ctxt.data_ac);
2565         if (ctxt.meta_ac)
2566                 ocfs2_free_alloc_context(ctxt.meta_ac);
2567         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2568                 ocfs2_schedule_truncate_log_flush(osb, 1);
2569         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2570 cleanup:
2571         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2572         ocfs2_inode_unlock(inode, 1);
2573 cleanup_nolock:
2574         brelse(di_bh);
2575         brelse(xbs.xattr_bh);
2576         ocfs2_xattr_bucket_free(xbs.bucket);
2577
2578         return ret;
2579 }
2580
2581 /*
2582  * Find the xattr extent rec which may contains name_hash.
2583  * e_cpos will be the first name hash of the xattr rec.
2584  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2585  */
2586 static int ocfs2_xattr_get_rec(struct inode *inode,
2587                                u32 name_hash,
2588                                u64 *p_blkno,
2589                                u32 *e_cpos,
2590                                u32 *num_clusters,
2591                                struct ocfs2_extent_list *el)
2592 {
2593         int ret = 0, i;
2594         struct buffer_head *eb_bh = NULL;
2595         struct ocfs2_extent_block *eb;
2596         struct ocfs2_extent_rec *rec = NULL;
2597         u64 e_blkno = 0;
2598
2599         if (el->l_tree_depth) {
2600                 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2601                 if (ret) {
2602                         mlog_errno(ret);
2603                         goto out;
2604                 }
2605
2606                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2607                 el = &eb->h_list;
2608
2609                 if (el->l_tree_depth) {
2610                         ocfs2_error(inode->i_sb,
2611                                     "Inode %lu has non zero tree depth in "
2612                                     "xattr tree block %llu\n", inode->i_ino,
2613                                     (unsigned long long)eb_bh->b_blocknr);
2614                         ret = -EROFS;
2615                         goto out;
2616                 }
2617         }
2618
2619         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2620                 rec = &el->l_recs[i];
2621
2622                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2623                         e_blkno = le64_to_cpu(rec->e_blkno);
2624                         break;
2625                 }
2626         }
2627
2628         if (!e_blkno) {
2629                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2630                             "record (%u, %u, 0) in xattr", inode->i_ino,
2631                             le32_to_cpu(rec->e_cpos),
2632                             ocfs2_rec_clusters(el, rec));
2633                 ret = -EROFS;
2634                 goto out;
2635         }
2636
2637         *p_blkno = le64_to_cpu(rec->e_blkno);
2638         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2639         if (e_cpos)
2640                 *e_cpos = le32_to_cpu(rec->e_cpos);
2641 out:
2642         brelse(eb_bh);
2643         return ret;
2644 }
2645
2646 typedef int (xattr_bucket_func)(struct inode *inode,
2647                                 struct ocfs2_xattr_bucket *bucket,
2648                                 void *para);
2649
2650 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2651                                    struct ocfs2_xattr_bucket *bucket,
2652                                    int name_index,
2653                                    const char *name,
2654                                    u32 name_hash,
2655                                    u16 *xe_index,
2656                                    int *found)
2657 {
2658         int i, ret = 0, cmp = 1, block_off, new_offset;
2659         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2660         size_t name_len = strlen(name);
2661         struct ocfs2_xattr_entry *xe = NULL;
2662         char *xe_name;
2663
2664         /*
2665          * We don't use binary search in the bucket because there
2666          * may be multiple entries with the same name hash.
2667          */
2668         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2669                 xe = &xh->xh_entries[i];
2670
2671                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2672                         continue;
2673                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2674                         break;
2675
2676                 cmp = name_index - ocfs2_xattr_get_type(xe);
2677                 if (!cmp)
2678                         cmp = name_len - xe->xe_name_len;
2679                 if (cmp)
2680                         continue;
2681
2682                 ret = ocfs2_xattr_bucket_get_name_value(inode,
2683                                                         xh,
2684                                                         i,
2685                                                         &block_off,
2686                                                         &new_offset);
2687                 if (ret) {
2688                         mlog_errno(ret);
2689                         break;
2690                 }
2691
2692                 xe_name = bucket_block(bucket, block_off) + new_offset;
2693                 if (!memcmp(name, xe_name, name_len)) {
2694                         *xe_index = i;
2695                         *found = 1;
2696                         ret = 0;
2697                         break;
2698                 }
2699         }
2700
2701         return ret;
2702 }
2703
2704 /*
2705  * Find the specified xattr entry in a series of buckets.
2706  * This series start from p_blkno and last for num_clusters.
2707  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2708  * the num of the valid buckets.
2709  *
2710  * Return the buffer_head this xattr should reside in. And if the xattr's
2711  * hash is in the gap of 2 buckets, return the lower bucket.
2712  */
2713 static int ocfs2_xattr_bucket_find(struct inode *inode,
2714                                    int name_index,
2715                                    const char *name,
2716                                    u32 name_hash,
2717                                    u64 p_blkno,
2718                                    u32 first_hash,
2719                                    u32 num_clusters,
2720                                    struct ocfs2_xattr_search *xs)
2721 {
2722         int ret, found = 0;
2723         struct ocfs2_xattr_header *xh = NULL;
2724         struct ocfs2_xattr_entry *xe = NULL;
2725         u16 index = 0;
2726         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2727         int low_bucket = 0, bucket, high_bucket;
2728         struct ocfs2_xattr_bucket *search;
2729         u32 last_hash;
2730         u64 blkno, lower_blkno = 0;
2731
2732         search = ocfs2_xattr_bucket_new(inode);
2733         if (!search) {
2734                 ret = -ENOMEM;
2735                 mlog_errno(ret);
2736                 goto out;
2737         }
2738
2739         ret = ocfs2_read_xattr_bucket(search, p_blkno);
2740         if (ret) {
2741                 mlog_errno(ret);
2742                 goto out;
2743         }
2744
2745         xh = bucket_xh(search);
2746         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2747         while (low_bucket <= high_bucket) {
2748                 ocfs2_xattr_bucket_relse(search);
2749
2750                 bucket = (low_bucket + high_bucket) / 2;
2751                 blkno = p_blkno + bucket * blk_per_bucket;
2752                 ret = ocfs2_read_xattr_bucket(search, blkno);
2753                 if (ret) {
2754                         mlog_errno(ret);
2755                         goto out;
2756                 }
2757
2758                 xh = bucket_xh(search);
2759                 xe = &xh->xh_entries[0];
2760                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2761                         high_bucket = bucket - 1;
2762                         continue;
2763                 }
2764
2765                 /*
2766                  * Check whether the hash of the last entry in our
2767                  * bucket is larger than the search one. for an empty
2768                  * bucket, the last one is also the first one.
2769                  */
2770                 if (xh->xh_count)
2771                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2772
2773                 last_hash = le32_to_cpu(xe->xe_name_hash);
2774
2775                 /* record lower_blkno which may be the insert place. */
2776                 lower_blkno = blkno;
2777
2778                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2779                         low_bucket = bucket + 1;
2780                         continue;
2781                 }
2782
2783                 /* the searched xattr should reside in this bucket if exists. */
2784                 ret = ocfs2_find_xe_in_bucket(inode, search,
2785                                               name_index, name, name_hash,
2786                                               &index, &found);
2787                 if (ret) {
2788                         mlog_errno(ret);
2789                         goto out;
2790                 }
2791                 break;
2792         }
2793
2794         /*
2795          * Record the bucket we have found.
2796          * When the xattr's hash value is in the gap of 2 buckets, we will
2797          * always set it to the previous bucket.
2798          */
2799         if (!lower_blkno)
2800                 lower_blkno = p_blkno;
2801
2802         /* This should be in cache - we just read it during the search */
2803         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
2804         if (ret) {
2805                 mlog_errno(ret);
2806                 goto out;
2807         }
2808
2809         xs->header = bucket_xh(xs->bucket);
2810         xs->base = bucket_block(xs->bucket, 0);
2811         xs->end = xs->base + inode->i_sb->s_blocksize;
2812
2813         if (found) {
2814                 xs->here = &xs->header->xh_entries[index];
2815                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2816                      (unsigned long long)bucket_blkno(xs->bucket), index);
2817         } else
2818                 ret = -ENODATA;
2819
2820 out:
2821         ocfs2_xattr_bucket_free(search);
2822         return ret;
2823 }
2824
2825 static int ocfs2_xattr_index_block_find(struct inode *inode,
2826                                         struct buffer_head *root_bh,
2827                                         int name_index,
2828                                         const char *name,
2829                                         struct ocfs2_xattr_search *xs)
2830 {
2831         int ret;
2832         struct ocfs2_xattr_block *xb =
2833                         (struct ocfs2_xattr_block *)root_bh->b_data;
2834         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2835         struct ocfs2_extent_list *el = &xb_root->xt_list;
2836         u64 p_blkno = 0;
2837         u32 first_hash, num_clusters = 0;
2838         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
2839
2840         if (le16_to_cpu(el->l_next_free_rec) == 0)
2841                 return -ENODATA;
2842
2843         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2844              name, name_hash, name_index);
2845
2846         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2847                                   &num_clusters, el);
2848         if (ret) {
2849                 mlog_errno(ret);
2850                 goto out;
2851         }
2852
2853         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2854
2855         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2856              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
2857              first_hash);
2858
2859         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2860                                       p_blkno, first_hash, num_clusters, xs);
2861
2862 out:
2863         return ret;
2864 }
2865
2866 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2867                                        u64 blkno,
2868                                        u32 clusters,
2869                                        xattr_bucket_func *func,
2870                                        void *para)
2871 {
2872         int i, ret = 0;
2873         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2874         u32 num_buckets = clusters * bpc;
2875         struct ocfs2_xattr_bucket *bucket;
2876
2877         bucket = ocfs2_xattr_bucket_new(inode);
2878         if (!bucket) {
2879                 mlog_errno(-ENOMEM);
2880                 return -ENOMEM;
2881         }
2882
2883         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2884              clusters, (unsigned long long)blkno);
2885
2886         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
2887                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
2888                 if (ret) {
2889                         mlog_errno(ret);
2890                         break;
2891                 }
2892
2893                 /*
2894                  * The real bucket num in this series of blocks is stored
2895                  * in the 1st bucket.
2896                  */
2897                 if (i == 0)
2898                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
2899
2900                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
2901                      (unsigned long long)blkno,
2902                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
2903                 if (func) {
2904                         ret = func(inode, bucket, para);
2905                         if (ret)
2906                                 mlog_errno(ret);
2907                         /* Fall through to bucket_relse() */
2908                 }
2909
2910                 ocfs2_xattr_bucket_relse(bucket);
2911                 if (ret)
2912                         break;
2913         }
2914
2915         ocfs2_xattr_bucket_free(bucket);
2916         return ret;
2917 }
2918
2919 struct ocfs2_xattr_tree_list {
2920         char *buffer;
2921         size_t buffer_size;
2922         size_t result;
2923 };
2924
2925 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2926                                              struct ocfs2_xattr_header *xh,
2927                                              int index,
2928                                              int *block_off,
2929                                              int *new_offset)
2930 {
2931         u16 name_offset;
2932
2933         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2934                 return -EINVAL;
2935
2936         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2937
2938         *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2939         *new_offset = name_offset % inode->i_sb->s_blocksize;
2940
2941         return 0;
2942 }
2943
2944 static int ocfs2_list_xattr_bucket(struct inode *inode,
2945                                    struct ocfs2_xattr_bucket *bucket,
2946                                    void *para)
2947 {
2948         int ret = 0, type;
2949         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
2950         int i, block_off, new_offset;
2951         const char *prefix, *name;
2952
2953         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
2954                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
2955                 type = ocfs2_xattr_get_type(entry);
2956                 prefix = ocfs2_xattr_prefix(type);
2957
2958                 if (prefix) {
2959                         ret = ocfs2_xattr_bucket_get_name_value(inode,
2960                                                                 bucket_xh(bucket),
2961                                                                 i,
2962                                                                 &block_off,
2963                                                                 &new_offset);
2964                         if (ret)
2965                                 break;
2966
2967                         name = (const char *)bucket_block(bucket, block_off) +
2968                                 new_offset;
2969                         ret = ocfs2_xattr_list_entry(xl->buffer,
2970                                                      xl->buffer_size,
2971                                                      &xl->result,
2972                                                      prefix, name,
2973                                                      entry->xe_name_len);
2974                         if (ret)
2975                                 break;
2976                 }
2977         }
2978
2979         return ret;
2980 }
2981
2982 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2983                                              struct ocfs2_xattr_tree_root *xt,
2984                                              char *buffer,
2985                                              size_t buffer_size)
2986 {
2987         struct ocfs2_extent_list *el = &xt->xt_list;
2988         int ret = 0;
2989         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
2990         u64 p_blkno = 0;
2991         struct ocfs2_xattr_tree_list xl = {
2992                 .buffer = buffer,
2993                 .buffer_size = buffer_size,
2994                 .result = 0,
2995         };
2996
2997         if (le16_to_cpu(el->l_next_free_rec) == 0)
2998                 return 0;
2999
3000         while (name_hash > 0) {
3001                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3002                                           &e_cpos, &num_clusters, el);
3003                 if (ret) {
3004                         mlog_errno(ret);
3005                         goto out;
3006                 }
3007
3008                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
3009                                                   ocfs2_list_xattr_bucket,
3010                                                   &xl);
3011                 if (ret) {
3012                         mlog_errno(ret);
3013                         goto out;
3014                 }
3015
3016                 if (e_cpos == 0)
3017                         break;
3018
3019                 name_hash = e_cpos - 1;
3020         }
3021
3022         ret = xl.result;
3023 out:
3024         return ret;
3025 }
3026
3027 static int cmp_xe(const void *a, const void *b)
3028 {
3029         const struct ocfs2_xattr_entry *l = a, *r = b;
3030         u32 l_hash = le32_to_cpu(l->xe_name_hash);
3031         u32 r_hash = le32_to_cpu(r->xe_name_hash);
3032
3033         if (l_hash > r_hash)
3034                 return 1;
3035         if (l_hash < r_hash)
3036                 return -1;
3037         return 0;
3038 }
3039
3040 static void swap_xe(void *a, void *b, int size)
3041 {
3042         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3043
3044         tmp = *l;
3045         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3046         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3047 }
3048
3049 /*
3050  * When the ocfs2_xattr_block is filled up, new bucket will be created
3051  * and all the xattr entries will be moved to the new bucket.
3052  * The header goes at the start of the bucket, and the names+values are
3053  * filled from the end.  This is why *target starts as the last buffer.
3054  * Note: we need to sort the entries since they are not saved in order
3055  * in the ocfs2_xattr_block.
3056  */
3057 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3058                                            struct buffer_head *xb_bh,
3059                                            struct ocfs2_xattr_bucket *bucket)
3060 {
3061         int i, blocksize = inode->i_sb->s_blocksize;
3062         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3063         u16 offset, size, off_change;
3064         struct ocfs2_xattr_entry *xe;
3065         struct ocfs2_xattr_block *xb =
3066                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
3067         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3068         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3069         u16 count = le16_to_cpu(xb_xh->xh_count);
3070         char *src = xb_bh->b_data;
3071         char *target = bucket_block(bucket, blks - 1);
3072
3073         mlog(0, "cp xattr from block %llu to bucket %llu\n",
3074              (unsigned long long)xb_bh->b_blocknr,
3075              (unsigned long long)bucket_blkno(bucket));
3076
3077         for (i = 0; i < blks; i++)
3078                 memset(bucket_block(bucket, i), 0, blocksize);
3079
3080         /*
3081          * Since the xe_name_offset is based on ocfs2_xattr_header,
3082          * there is a offset change corresponding to the change of
3083          * ocfs2_xattr_header's position.
3084          */
3085         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3086         xe = &xb_xh->xh_entries[count - 1];
3087         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3088         size = blocksize - offset;
3089
3090         /* copy all the names and values. */
3091         memcpy(target + offset, src + offset, size);
3092
3093         /* Init new header now. */
3094         xh->xh_count = xb_xh->xh_count;
3095         xh->xh_num_buckets = cpu_to_le16(1);
3096         xh->xh_name_value_len = cpu_to_le16(size);
3097         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3098
3099         /* copy all the entries. */
3100         target = bucket_block(bucket, 0);
3101         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3102         size = count * sizeof(struct ocfs2_xattr_entry);
3103         memcpy(target + offset, (char *)xb_xh + offset, size);
3104
3105         /* Change the xe offset for all the xe because of the move. */
3106         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3107                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3108         for (i = 0; i < count; i++)
3109                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3110
3111         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3112              offset, size, off_change);
3113
3114         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3115              cmp_xe, swap_xe);
3116 }
3117
3118 /*
3119  * After we move xattr from block to index btree, we have to
3120  * update ocfs2_xattr_search to the new xe and base.
3121  *
3122  * When the entry is in xattr block, xattr_bh indicates the storage place.
3123  * While if the entry is in index b-tree, "bucket" indicates the
3124  * real place of the xattr.
3125  */
3126 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3127                                             struct ocfs2_xattr_search *xs,
3128                                             struct buffer_head *old_bh)
3129 {
3130         char *buf = old_bh->b_data;
3131         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3132         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3133         int i;
3134
3135         xs->header = bucket_xh(xs->bucket);
3136         xs->base = bucket_block(xs->bucket, 0);
3137         xs->end = xs->base + inode->i_sb->s_blocksize;
3138
3139         if (xs->not_found)
3140                 return;
3141
3142         i = xs->here - old_xh->xh_entries;
3143         xs->here = &xs->header->xh_entries[i];
3144 }
3145
3146 static int ocfs2_xattr_create_index_block(struct inode *inode,
3147                                           struct ocfs2_xattr_search *xs,
3148                                           struct ocfs2_xattr_set_ctxt *ctxt)
3149 {
3150         int ret;
3151         u32 bit_off, len;
3152         u64 blkno;
3153         handle_t *handle = ctxt->handle;
3154         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3155         struct ocfs2_inode_info *oi = OCFS2_I(inode);
3156         struct buffer_head *xb_bh = xs->xattr_bh;
3157         struct ocfs2_xattr_block *xb =
3158                         (struct ocfs2_xattr_block *)xb_bh->b_data;
3159         struct ocfs2_xattr_tree_root *xr;
3160         u16 xb_flags = le16_to_cpu(xb->xb_flags);
3161
3162         mlog(0, "create xattr index block for %llu\n",
3163              (unsigned long long)xb_bh->b_blocknr);
3164
3165         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3166         BUG_ON(!xs->bucket);
3167
3168         /*
3169          * XXX:
3170          * We can use this lock for now, and maybe move to a dedicated mutex
3171          * if performance becomes a problem later.
3172          */
3173         down_write(&oi->ip_alloc_sem);
3174
3175         ret = ocfs2_journal_access(handle, inode, xb_bh,
3176                                    OCFS2_JOURNAL_ACCESS_WRITE);
3177         if (ret) {
3178                 mlog_errno(ret);
3179                 goto out;
3180         }
3181
3182         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3183                                      1, 1, &bit_off, &len);
3184         if (ret) {
3185                 mlog_errno(ret);
3186                 goto out;
3187         }
3188
3189         /*
3190          * The bucket may spread in many blocks, and
3191          * we will only touch the 1st block and the last block
3192          * in the whole bucket(one for entry and one for data).
3193          */
3194         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3195
3196         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3197              (unsigned long long)blkno);
3198
3199         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3200         if (ret) {
3201                 mlog_errno(ret);
3202                 goto out;
3203         }
3204
3205         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3206                                                 OCFS2_JOURNAL_ACCESS_CREATE);
3207         if (ret) {
3208                 mlog_errno(ret);
3209                 goto out;
3210         }
3211
3212         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3213         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3214
3215         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3216
3217         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3218         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3219                offsetof(struct ocfs2_xattr_block, xb_attrs));
3220
3221         xr = &xb->xb_attrs.xb_root;
3222         xr->xt_clusters = cpu_to_le32(1);
3223         xr->xt_last_eb_blk = 0;
3224         xr->xt_list.l_tree_depth = 0;
3225         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3226         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3227
3228         xr->xt_list.l_recs[0].e_cpos = 0;
3229         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3230         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3231
3232         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3233
3234         ocfs2_journal_dirty(handle, xb_bh);
3235
3236 out:
3237         up_write(&oi->ip_alloc_sem);
3238
3239         return ret;
3240 }
3241
3242 static int cmp_xe_offset(const void *a, const void *b)
3243 {
3244         const struct ocfs2_xattr_entry *l = a, *r = b;
3245         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3246         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3247
3248         if (l_name_offset < r_name_offset)
3249                 return 1;
3250         if (l_name_offset > r_name_offset)
3251                 return -1;
3252         return 0;
3253 }
3254
3255 /*
3256  * defrag a xattr bucket if we find that the bucket has some
3257  * holes beteen name/value pairs.
3258  * We will move all the name/value pairs to the end of the bucket
3259  * so that we can spare some space for insertion.
3260  */
3261 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3262                                      handle_t *handle,
3263                                      struct ocfs2_xattr_bucket *bucket)
3264 {
3265         int ret, i;
3266         size_t end, offset, len, value_len;
3267         struct ocfs2_xattr_header *xh;
3268         char *entries, *buf, *bucket_buf = NULL;
3269         u64 blkno = bucket_blkno(bucket);
3270         u16 xh_free_start;
3271         size_t blocksize = inode->i_sb->s_blocksize;
3272         struct ocfs2_xattr_entry *xe;
3273
3274         /*
3275          * In order to make the operation more efficient and generic,
3276          * we copy all the blocks into a contiguous memory and do the
3277          * defragment there, so if anything is error, we will not touch
3278          * the real block.
3279          */
3280         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3281         if (!bucket_buf) {
3282                 ret = -EIO;
3283                 goto out;
3284         }
3285
3286         buf = bucket_buf;
3287         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3288                 memcpy(buf, bucket_block(bucket, i), blocksize);
3289
3290         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3291                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3292         if (ret < 0) {
3293                 mlog_errno(ret);
3294                 goto out;
3295         }
3296
3297         xh = (struct ocfs2_xattr_header *)bucket_buf;
3298         entries = (char *)xh->xh_entries;
3299         xh_free_start = le16_to_cpu(xh->xh_free_start);
3300
3301         mlog(0, "adjust xattr bucket in %llu, count = %u, "
3302              "xh_free_start = %u, xh_name_value_len = %u.\n",
3303              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3304              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3305
3306         /*
3307          * sort all the entries by their offset.
3308          * the largest will be the first, so that we can
3309          * move them to the end one by one.
3310          */
3311         sort(entries, le16_to_cpu(xh->xh_count),
3312              sizeof(struct ocfs2_xattr_entry),
3313              cmp_xe_offset, swap_xe);
3314
3315         /* Move all name/values to the end of the bucket. */
3316         xe = xh->xh_entries;
3317         end = OCFS2_XATTR_BUCKET_SIZE;
3318         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3319                 offset = le16_to_cpu(xe->xe_name_offset);
3320                 if (ocfs2_xattr_is_local(xe))
3321                         value_len = OCFS2_XATTR_SIZE(
3322                                         le64_to_cpu(xe->xe_value_size));
3323                 else
3324                         value_len = OCFS2_XATTR_ROOT_SIZE;
3325                 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3326
3327                 /*
3328                  * We must make sure that the name/value pair
3329                  * exist in the same block. So adjust end to
3330                  * the previous block end if needed.
3331                  */
3332                 if (((end - len) / blocksize !=
3333                         (end - 1) / blocksize))
3334                         end = end - end % blocksize;
3335
3336                 if (end > offset + len) {
3337                         memmove(bucket_buf + end - len,
3338                                 bucket_buf + offset, len);
3339                         xe->xe_name_offset = cpu_to_le16(end - len);
3340                 }
3341
3342                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3343                                 "bucket %llu\n", (unsigned long long)blkno);
3344
3345                 end -= len;
3346         }
3347
3348         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3349                         "bucket %llu\n", (unsigned long long)blkno);
3350
3351         if (xh_free_start == end)
3352                 goto out;
3353
3354         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3355         xh->xh_free_start = cpu_to_le16(end);
3356
3357         /* sort the entries by their name_hash. */
3358         sort(entries, le16_to_cpu(xh->xh_count),
3359              sizeof(struct ocfs2_xattr_entry),
3360              cmp_xe, swap_xe);
3361
3362         buf = bucket_buf;
3363         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3364                 memcpy(bucket_block(bucket, i), buf, blocksize);
3365         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3366
3367 out:
3368         kfree(bucket_buf);
3369         return ret;
3370 }
3371
3372 /*
3373  * Move half nums of the xattr bucket in the previous cluster to this new
3374  * cluster. We only touch the last cluster of the previous extend record.
3375  *
3376  * first_bh is the first buffer_head of a series of bucket in the same
3377  * extent rec and header_bh is the header of one bucket in this cluster.
3378  * They will be updated if we move the data header_bh contains to the new
3379  * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3380  */
3381 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3382                                                handle_t *handle,
3383                                                struct buffer_head **first_bh,
3384                                                struct buffer_head **header_bh,
3385                                                u64 new_blkno,
3386                                                u64 prev_blkno,
3387                                                u32 num_clusters,
3388                                                u32 *first_hash)
3389 {
3390         int i, ret, credits;
3391         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3392         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3393         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3394         int blocksize = inode->i_sb->s_blocksize;
3395         struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3396         struct ocfs2_xattr_header *new_xh;
3397         struct ocfs2_xattr_header *xh =
3398                         (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3399
3400         BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3401         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3402
3403         prev_bh = *first_bh;
3404         get_bh(prev_bh);
3405         xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3406
3407         prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3408
3409         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3410              (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
3411
3412         /*
3413          * We need to update the 1st half of the new cluster and
3414          * 1 more for the update of the 1st bucket of the previous
3415          * extent record.
3416          */
3417         credits = bpc / 2 + 1 + handle->h_buffer_credits;
3418         ret = ocfs2_extend_trans(handle, credits);
3419         if (ret) {
3420                 mlog_errno(ret);
3421                 goto out;
3422         }
3423
3424         ret = ocfs2_journal_access(handle, inode, prev_bh,
3425                                    OCFS2_JOURNAL_ACCESS_WRITE);
3426         if (ret) {
3427                 mlog_errno(ret);
3428                 goto out;
3429         }
3430
3431         for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3432                 old_bh = new_bh = NULL;
3433                 new_bh = sb_getblk(inode->i_sb, new_blkno);
3434                 if (!new_bh) {
3435                         ret = -EIO;
3436                         mlog_errno(ret);
3437                         goto out;
3438                 }
3439
3440                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
3441
3442                 ret = ocfs2_journal_access(handle, inode, new_bh,
3443                                            OCFS2_JOURNAL_ACCESS_CREATE);
3444                 if (ret < 0) {
3445                         mlog_errno(ret);
3446                         brelse(new_bh);
3447                         goto out;
3448                 }
3449
3450                 ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
3451                 if (ret < 0) {
3452                         mlog_errno(ret);
3453                         brelse(new_bh);
3454                         goto out;
3455                 }
3456
3457                 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3458
3459                 if (i == 0) {
3460                         new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3461                         new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3462
3463                         if (first_hash)
3464                                 *first_hash = le32_to_cpu(
3465                                         new_xh->xh_entries[0].xe_name_hash);
3466                         new_first_bh = new_bh;
3467                         get_bh(new_first_bh);
3468                 }
3469
3470                 ocfs2_journal_dirty(handle, new_bh);
3471
3472                 if (*header_bh == old_bh) {
3473                         brelse(*header_bh);
3474                         *header_bh = new_bh;
3475                         get_bh(*header_bh);
3476
3477                         brelse(*first_bh);
3478                         *first_bh = new_first_bh;
3479                         get_bh(*first_bh);
3480                 }
3481                 brelse(new_bh);
3482                 brelse(old_bh);
3483         }
3484
3485         le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3486
3487         ocfs2_journal_dirty(handle, prev_bh);
3488 out:
3489         brelse(prev_bh);
3490         brelse(new_first_bh);
3491         return ret;
3492 }
3493
3494 /*
3495  * Find the suitable pos when we divide a bucket into 2.
3496  * We have to make sure the xattrs with the same hash value exist
3497  * in the same bucket.
3498  *
3499  * If this ocfs2_xattr_header covers more than one hash value, find a
3500  * place where the hash value changes.  Try to find the most even split.
3501  * The most common case is that all entries have different hash values,
3502  * and the first check we make will find a place to split.
3503  */
3504 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3505 {
3506         struct ocfs2_xattr_entry *entries = xh->xh_entries;
3507         int count = le16_to_cpu(xh->xh_count);
3508         int delta, middle = count / 2;
3509
3510         /*
3511          * We start at the middle.  Each step gets farther away in both
3512          * directions.  We therefore hit the change in hash value
3513          * nearest to the middle.  Note that this loop does not execute for
3514          * count < 2.
3515          */
3516         for (delta = 0; delta < middle; delta++) {
3517                 /* Let's check delta earlier than middle */
3518                 if (cmp_xe(&entries[middle - delta - 1],
3519                            &entries[middle - delta]))
3520                         return middle - delta;
3521
3522                 /* For even counts, don't walk off the end */
3523                 if ((middle + delta + 1) == count)
3524                         continue;
3525
3526                 /* Now try delta past middle */
3527                 if (cmp_xe(&entries[middle + delta],
3528                            &entries[middle + delta + 1]))
3529                         return middle + delta + 1;
3530         }
3531
3532         /* Every entry had the same hash */
3533         return count;
3534 }
3535
3536 /*
3537  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3538  * first_hash will record the 1st hash of the new bucket.
3539  *
3540  * Normally half of the xattrs will be moved.  But we have to make
3541  * sure that the xattrs with the same hash value are stored in the
3542  * same bucket. If all the xattrs in this bucket have the same hash
3543  * value, the new bucket will be initialized as an empty one and the
3544  * first_hash will be initialized as (hash_value+1).
3545  */
3546 static int ocfs2_divide_xattr_bucket(struct inode *inode,
3547                                     handle_t *handle,
3548                                     u64 blk,
3549                                     u64 new_blk,
3550                                     u32 *first_hash,
3551                                     int new_bucket_head)
3552 {
3553         int ret, i;
3554         int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3555         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3556         struct ocfs2_xattr_header *xh;
3557         struct ocfs2_xattr_entry *xe;
3558         int blocksize = inode->i_sb->s_blocksize;
3559
3560         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3561              (unsigned long long)blk, (unsigned long long)new_blk);
3562
3563         s_bucket = ocfs2_xattr_bucket_new(inode);
3564         t_bucket = ocfs2_xattr_bucket_new(inode);
3565         if (!s_bucket || !t_bucket) {
3566                 ret = -ENOMEM;
3567                 mlog_errno(ret);
3568                 goto out;
3569         }
3570
3571         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3572         if (ret) {
3573                 mlog_errno(ret);
3574                 goto out;
3575         }
3576
3577         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3578                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3579         if (ret) {
3580                 mlog_errno(ret);
3581                 goto out;
3582         }
3583
3584         /*
3585          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
3586          * there's no need to read it.
3587          */
3588         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3589         if (ret) {
3590                 mlog_errno(ret);
3591                 goto out;
3592         }
3593
3594         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3595                                                 new_bucket_head ?
3596                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3597                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3598         if (ret) {
3599                 mlog_errno(ret);
3600                 goto out;
3601         }
3602
3603         xh = bucket_xh(s_bucket);
3604         count = le16_to_cpu(xh->xh_count);
3605         start = ocfs2_xattr_find_divide_pos(xh);
3606
3607         if (start == count) {
3608                 xe = &xh->xh_entries[start-1];
3609
3610                 /*
3611                  * initialized a new empty bucket here.
3612                  * The hash value is set as one larger than
3613                  * that of the last entry in the previous bucket.
3614                  */
3615                 for (i = 0; i < t_bucket->bu_blocks; i++)
3616                         memset(bucket_block(t_bucket, i), 0, blocksize);
3617
3618                 xh = bucket_xh(t_bucket);
3619                 xh->xh_free_start = cpu_to_le16(blocksize);
3620                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3621                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3622
3623                 goto set_num_buckets;
3624         }
3625
3626         /* copy the whole bucket to the new first. */
3627         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3628
3629         /* update the new bucket. */
3630         xh = bucket_xh(t_bucket);
3631
3632         /*
3633          * Calculate the total name/value len and xh_free_start for
3634          * the old bucket first.
3635          */
3636         name_offset = OCFS2_XATTR_BUCKET_SIZE;
3637         name_value_len = 0;
3638         for (i = 0; i < start; i++) {
3639                 xe = &xh->xh_entries[i];
3640                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3641                 if (ocfs2_xattr_is_local(xe))
3642                         xe_len +=
3643                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3644                 else
3645                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3646                 name_value_len += xe_len;
3647                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3648                         name_offset = le16_to_cpu(xe->xe_name_offset);
3649         }
3650
3651         /*
3652          * Now begin the modification to the new bucket.
3653          *
3654          * In the new bucket, We just move the xattr entry to the beginning
3655          * and don't touch the name/value. So there will be some holes in the
3656          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3657          * called.
3658          */
3659         xe = &xh->xh_entries[start];
3660         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3661         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3662              (int)((char *)xe - (char *)xh),
3663              (int)((char *)xh->xh_entries - (char *)xh));
3664         memmove((char *)xh->xh_entries, (char *)xe, len);
3665         xe = &xh->xh_entries[count - start];
3666         len = sizeof(struct ocfs2_xattr_entry) * start;
3667         memset((char *)xe, 0, len);
3668
3669         le16_add_cpu(&xh->xh_count, -start);
3670         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3671
3672         /* Calculate xh_free_start for the new bucket. */
3673         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3674         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3675                 xe = &xh->xh_entries[i];
3676                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3677                 if (ocfs2_xattr_is_local(xe))
3678                         xe_len +=
3679                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3680                 else
3681                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3682                 if (le16_to_cpu(xe->xe_name_offset) <
3683                     le16_to_cpu(xh->xh_free_start))
3684                         xh->xh_free_start = xe->xe_name_offset;
3685         }
3686
3687 set_num_buckets:
3688         /* set xh->xh_num_buckets for the new xh. */
3689         if (new_bucket_head)
3690                 xh->xh_num_buckets = cpu_to_le16(1);
3691         else
3692                 xh->xh_num_buckets = 0;
3693
3694         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3695
3696         /* store the first_hash of the new bucket. */
3697         if (first_hash)
3698                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3699
3700         /*
3701          * Now only update the 1st block of the old bucket.  If we
3702          * just added a new empty bucket, there is no need to modify
3703          * it.
3704          */
3705         if (start == count)
3706                 goto out;
3707
3708         xh = bucket_xh(s_bucket);
3709         memset(&xh->xh_entries[start], 0,
3710                sizeof(struct ocfs2_xattr_entry) * (count - start));
3711         xh->xh_count = cpu_to_le16(start);
3712         xh->xh_free_start = cpu_to_le16(name_offset);
3713         xh->xh_name_value_len = cpu_to_le16(name_value_len);
3714
3715         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
3716
3717 out:
3718         ocfs2_xattr_bucket_free(s_bucket);
3719         ocfs2_xattr_bucket_free(t_bucket);
3720
3721         return ret;
3722 }
3723
3724 /*
3725  * Copy xattr from one bucket to another bucket.
3726  *
3727  * The caller must make sure that the journal transaction
3728  * has enough space for journaling.
3729  */
3730 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3731                                  handle_t *handle,
3732                                  u64 s_blkno,
3733                                  u64 t_blkno,
3734                                  int t_is_new)
3735 {
3736         int ret;
3737         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3738
3739         BUG_ON(s_blkno == t_blkno);
3740
3741         mlog(0, "cp bucket %llu to %llu, target is %d\n",
3742              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3743              t_is_new);
3744
3745         s_bucket = ocfs2_xattr_bucket_new(inode);
3746         t_bucket = ocfs2_xattr_bucket_new(inode);
3747         if (!s_bucket || !t_bucket) {
3748                 ret = -ENOMEM;
3749                 mlog_errno(ret);
3750                 goto out;
3751         }
3752   
3753         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
3754         if (ret)
3755                 goto out;
3756
3757         /*
3758          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
3759          * there's no need to read it.
3760          */
3761         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
3762         if (ret)
3763                 goto out;
3764
3765         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3766                                                 t_is_new ?
3767                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3768                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3769         if (ret)
3770                 goto out;
3771
3772         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3773         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3774
3775 out:
3776         ocfs2_xattr_bucket_free(t_bucket);
3777         ocfs2_xattr_bucket_free(s_bucket);
3778
3779         return ret;
3780 }
3781
3782 /*
3783  * Copy one xattr cluster from src_blk to to_blk.
3784  * The to_blk will become the first bucket header of the cluster, so its
3785  * xh_num_buckets will be initialized as the bucket num in the cluster.
3786  */
3787 static int ocfs2_cp_xattr_cluster(struct inode *inode,
3788                                   handle_t *handle,
3789                                   struct buffer_head *first_bh,
3790                                   u64 src_blk,
3791                                   u64 to_blk,
3792                                   u32 *first_hash)
3793 {
3794         int i, ret, credits;
3795         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3796         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3797         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3798         struct buffer_head *bh = NULL;
3799         struct ocfs2_xattr_header *xh;
3800         u64 to_blk_start = to_blk;
3801
3802         mlog(0, "cp xattrs from cluster %llu to %llu\n",
3803              (unsigned long long)src_blk, (unsigned long long)to_blk);
3804
3805         /*
3806          * We need to update the new cluster and 1 more for the update of
3807          * the 1st bucket of the previous extent rec.
3808          */
3809         credits = bpc + 1 + handle->h_buffer_credits;
3810         ret = ocfs2_extend_trans(handle, credits);
3811         if (ret) {
3812                 mlog_errno(ret);
3813                 goto out;
3814         }
3815
3816         ret = ocfs2_journal_access(handle, inode, first_bh,
3817                                    OCFS2_JOURNAL_ACCESS_WRITE);
3818         if (ret) {
3819                 mlog_errno(ret);
3820                 goto out;
3821         }
3822
3823         for (i = 0; i < num_buckets; i++) {
3824                 ret = ocfs2_cp_xattr_bucket(inode, handle,
3825                                             src_blk, to_blk, 1);
3826                 if (ret) {
3827                         mlog_errno(ret);
3828                         goto out;
3829                 }
3830
3831                 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3832                 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3833         }
3834
3835         /* update the old bucket header. */
3836         xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3837         le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3838
3839         ocfs2_journal_dirty(handle, first_bh);
3840
3841         /* update the new bucket header. */
3842         ret = ocfs2_read_block(inode, to_blk_start, &bh);
3843         if (ret < 0) {
3844                 mlog_errno(ret);
3845                 goto out;
3846         }
3847
3848         ret = ocfs2_journal_access(handle, inode, bh,
3849                                    OCFS2_JOURNAL_ACCESS_WRITE);
3850         if (ret) {
3851                 mlog_errno(ret);
3852                 goto out;
3853         }
3854
3855         xh = (struct ocfs2_xattr_header *)bh->b_data;
3856         xh->xh_num_buckets = cpu_to_le16(num_buckets);
3857
3858         ocfs2_journal_dirty(handle, bh);
3859
3860         if (first_hash)
3861                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3862 out:
3863         brelse(bh);
3864         return ret;
3865 }
3866
3867 /*
3868  * Move some xattrs in this cluster to the new cluster.
3869  * This function should only be called when bucket size == cluster size.
3870  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3871  */
3872 static int ocfs2_divide_xattr_cluster(struct inode *inode,
3873                                       handle_t *handle,
3874                                       u64 prev_blk,
3875                                       u64 new_blk,
3876                                       u32 *first_hash)
3877 {
3878         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3879         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
3880
3881         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3882
3883         ret = ocfs2_extend_trans(handle, credits);
3884         if (ret) {
3885                 mlog_errno(ret);
3886                 return ret;
3887         }
3888
3889         /* Move half of the xattr in start_blk to the next bucket. */
3890         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
3891                                           new_blk, first_hash, 1);
3892 }
3893
3894 /*
3895  * Move some xattrs from the old cluster to the new one since they are not
3896  * contiguous in ocfs2 xattr tree.
3897  *
3898  * new_blk starts a new separate cluster, and we will move some xattrs from
3899  * prev_blk to it. v_start will be set as the first name hash value in this
3900  * new cluster so that it can be used as e_cpos during tree insertion and
3901  * don't collide with our original b-tree operations. first_bh and header_bh
3902  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3903  * to extend the insert bucket.
3904  *
3905  * The problem is how much xattr should we move to the new one and when should
3906  * we update first_bh and header_bh?
3907  * 1. If cluster size > bucket size, that means the previous cluster has more
3908  *    than 1 bucket, so just move half nums of bucket into the new cluster and
3909  *    update the first_bh and header_bh if the insert bucket has been moved
3910  *    to the new cluster.
3911  * 2. If cluster_size == bucket_size:
3912  *    a) If the previous extent rec has more than one cluster and the insert
3913  *       place isn't in the last cluster, copy the entire last cluster to the
3914  *       new one. This time, we don't need to upate the first_bh and header_bh
3915  *       since they will not be moved into the new cluster.
3916  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
3917  *       the new one. And we set the extend flag to zero if the insert place is
3918  *       moved into the new allocated cluster since no extend is needed.
3919  */
3920 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3921                                             handle_t *handle,
3922                                             struct buffer_head **first_bh,
3923                                             struct buffer_head **header_bh,
3924                                             u64 new_blk,
3925                                             u64 prev_blk,
3926                                             u32 prev_clusters,
3927                                             u32 *v_start,
3928                                             int *extend)
3929 {
3930         int ret = 0;
3931         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3932
3933         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3934              (unsigned long long)prev_blk, prev_clusters,
3935              (unsigned long long)new_blk);
3936
3937         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3938                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3939                                                           handle,
3940                                                           first_bh,
3941                                                           header_bh,
3942                                                           new_blk,
3943                                                           prev_blk,
3944                                                           prev_clusters,
3945                                                           v_start);
3946         else {
3947                 u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3948
3949                 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3950                         ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3951                                                      last_blk, new_blk,
3952                                                      v_start);
3953                 else {
3954                         ret = ocfs2_divide_xattr_cluster(inode, handle,
3955                                                          last_blk, new_blk,
3956                                                          v_start);
3957
3958                         if ((*header_bh)->b_blocknr == last_blk && extend)
3959                                 *extend = 0;
3960                 }
3961         }
3962
3963         return ret;
3964 }
3965
3966 /*
3967  * Add a new cluster for xattr storage.
3968  *
3969  * If the new cluster is contiguous with the previous one, it will be
3970  * appended to the same extent record, and num_clusters will be updated.
3971  * If not, we will insert a new extent for it and move some xattrs in
3972  * the last cluster into the new allocated one.
3973  * We also need to limit the maximum size of a btree leaf, otherwise we'll
3974  * lose the benefits of hashing because we'll have to search large leaves.
3975  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3976  * if it's bigger).
3977  *
3978  * first_bh is the first block of the previous extent rec and header_bh
3979  * indicates the bucket we will insert the new xattrs. They will be updated
3980  * when the header_bh is moved into the new cluster.
3981  */
3982 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3983                                        struct buffer_head *root_bh,
3984                                        struct buffer_head **first_bh,
3985                                        struct buffer_head **header_bh,
3986                                        u32 *num_clusters,
3987                                        u32 prev_cpos,
3988                                        u64 prev_blkno,
3989                                        int *extend,
3990                                        struct ocfs2_xattr_set_ctxt *ctxt)
3991 {
3992         int ret;
3993         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3994         u32 prev_clusters = *num_clusters;
3995         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3996         u64 block;
3997         handle_t *handle = ctxt->handle;
3998         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3999         struct ocfs2_extent_tree et;
4000
4001         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4002              "previous xattr blkno = %llu\n",
4003              (unsigned long long)OCFS2_I(inode)->ip_blkno,
4004              prev_cpos, (unsigned long long)prev_blkno);
4005
4006         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4007
4008         ret = ocfs2_journal_access(handle, inode, root_bh,
4009                                    OCFS2_JOURNAL_ACCESS_WRITE);
4010         if (ret < 0) {
4011                 mlog_errno(ret);
4012                 goto leave;
4013         }
4014
4015         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4016                                      clusters_to_add, &bit_off, &num_bits);
4017         if (ret < 0) {
4018                 if (ret != -ENOSPC)
4019                         mlog_errno(ret);
4020                 goto leave;
4021         }
4022
4023         BUG_ON(num_bits > clusters_to_add);
4024
4025         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4026         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4027              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4028
4029         if (prev_blkno + prev_clusters * bpc == block &&
4030             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4031              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4032                 /*
4033                  * If this cluster is contiguous with the old one and
4034                  * adding this new cluster, we don't surpass the limit of
4035                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4036                  * initialized and used like other buckets in the previous
4037                  * cluster.
4038                  * So add it as a contiguous one. The caller will handle
4039                  * its init process.
4040                  */
4041                 v_start = prev_cpos + prev_clusters;
4042                 *num_clusters = prev_clusters + num_bits;
4043                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4044                      num_bits);
4045         } else {
4046                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
4047                                                        handle,
4048                                                        first_bh,
4049                                                        header_bh,
4050                                                        block,
4051                                                        prev_blkno,
4052                                                        prev_clusters,
4053                                                        &v_start,
4054                                                        extend);
4055                 if (ret) {
4056                         mlog_errno(ret);
4057                         goto leave;
4058                 }
4059         }
4060
4061         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4062              num_bits, (unsigned long long)block, v_start);
4063         ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
4064                                   num_bits, 0, ctxt->meta_ac);
4065         if (ret < 0) {
4066                 mlog_errno(ret);
4067                 goto leave;
4068         }
4069
4070         ret = ocfs2_journal_dirty(handle, root_bh);
4071         if (ret < 0)
4072                 mlog_errno(ret);
4073
4074 leave:
4075         return ret;
4076 }
4077
4078 /*
4079  * Extend a new xattr bucket and move xattrs to the end one by one until
4080  * We meet with start_bh. Only move half of the xattrs to the bucket after it.
4081  */
4082 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4083                                      handle_t *handle,
4084                                      struct buffer_head *first_bh,
4085                                      struct buffer_head *start_bh,
4086                                      u32 num_clusters)
4087 {
4088         int ret, credits;
4089         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4090         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4091         u64 start_blk = start_bh->b_blocknr, end_blk;
4092         u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
4093         struct ocfs2_xattr_header *first_xh =
4094                                 (struct ocfs2_xattr_header *)first_bh->b_data;
4095         u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
4096
4097         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4098              "from %llu, len = %u\n", (unsigned long long)start_blk,
4099              (unsigned long long)first_bh->b_blocknr, num_clusters);
4100
4101         BUG_ON(bucket >= num_buckets);
4102
4103         end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
4104
4105         /*
4106          * We will touch all the buckets after the start_bh(include it).
4107          * Then we add one more bucket.
4108          */
4109         credits = end_blk - start_blk + 3 * blk_per_bucket + 1 +
4110                   handle->h_buffer_credits;
4111         ret = ocfs2_extend_trans(handle, credits);
4112         if (ret) {
4113                 mlog_errno(ret);
4114                 goto out;
4115         }
4116
4117         ret = ocfs2_journal_access(handle, inode, first_bh,
4118                                    OCFS2_JOURNAL_ACCESS_WRITE);
4119         if (ret) {
4120                 mlog_errno(ret);
4121                 goto out;
4122         }
4123
4124         while (end_blk != start_blk) {
4125                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4126                                             end_blk + blk_per_bucket, 0);
4127                 if (ret)
4128                         goto out;
4129                 end_blk -= blk_per_bucket;
4130         }
4131
4132         /* Move half of the xattr in start_blk to the next bucket. */
4133         ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
4134                                         start_blk + blk_per_bucket, NULL, 0);
4135
4136         le16_add_cpu(&first_xh->xh_num_buckets, 1);
4137         ocfs2_journal_dirty(handle, first_bh);
4138
4139 out:
4140         return ret;
4141 }
4142
4143 /*
4144  * Add new xattr bucket in an extent record and adjust the buckets accordingly.
4145  * xb_bh is the ocfs2_xattr_block.
4146  * We will move all the buckets starting from header_bh to the next place. As
4147  * for this one, half num of its xattrs will be moved to the next one.
4148  *
4149  * We will allocate a new cluster if current cluster is full and adjust
4150  * header_bh and first_bh if the insert place is moved to the new cluster.
4151  */
4152 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4153                                       struct buffer_head *xb_bh,
4154                                       struct buffer_head *header_bh,
4155                                       struct ocfs2_xattr_set_ctxt *ctxt)
4156 {
4157         struct ocfs2_xattr_header *first_xh = NULL;
4158         struct buffer_head *first_bh = NULL;
4159         struct ocfs2_xattr_block *xb =
4160                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4161         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4162         struct ocfs2_extent_list *el = &xb_root->xt_list;
4163         struct ocfs2_xattr_header *xh =
4164                         (struct ocfs2_xattr_header *)header_bh->b_data;
4165         u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4166         struct super_block *sb = inode->i_sb;
4167         struct ocfs2_super *osb = OCFS2_SB(sb);
4168         int ret, num_buckets, extend = 1;
4169         u64 p_blkno;
4170         u32 e_cpos, num_clusters;
4171
4172         mlog(0, "Add new xattr bucket starting form %llu\n",
4173              (unsigned long long)header_bh->b_blocknr);
4174
4175         /*
4176          * Add refrence for header_bh here because it may be
4177          * changed in ocfs2_add_new_xattr_cluster and we need
4178          * to free it in the end.
4179          */
4180         get_bh(header_bh);
4181
4182         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4183                                   &num_clusters, el);
4184         if (ret) {
4185                 mlog_errno(ret);
4186                 goto out;
4187         }
4188
4189         ret = ocfs2_read_block(inode, p_blkno, &first_bh);
4190         if (ret) {
4191                 mlog_errno(ret);
4192                 goto out;
4193         }
4194
4195         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4196         first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
4197
4198         if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
4199                 ret = ocfs2_add_new_xattr_cluster(inode,
4200                                                   xb_bh,
4201                                                   &first_bh,
4202                                                   &header_bh,
4203                                                   &num_clusters,
4204                                                   e_cpos,
4205                                                   p_blkno,
4206                                                   &extend,
4207                                                   ctxt);
4208                 if (ret) {
4209                         mlog_errno(ret);
4210                         goto out;
4211                 }
4212         }
4213
4214         if (extend)
4215                 ret = ocfs2_extend_xattr_bucket(inode,
4216                                                 ctxt->handle,
4217                                                 first_bh,
4218                                                 header_bh,
4219                                                 num_clusters);
4220         if (ret)
4221                 mlog_errno(ret);
4222 out:
4223         brelse(first_bh);
4224         brelse(header_bh);
4225         return ret;
4226 }
4227
4228 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4229                                         struct ocfs2_xattr_bucket *bucket,
4230                                         int offs)
4231 {
4232         int block_off = offs >> inode->i_sb->s_blocksize_bits;
4233
4234         offs = offs % inode->i_sb->s_blocksize;
4235         return bucket_block(bucket, block_off) + offs;
4236 }
4237
4238 /*
4239  * Handle the normal xattr set, including replace, delete and new.
4240  *
4241  * Note: "local" indicates the real data's locality. So we can't
4242  * just its bucket locality by its length.
4243  */
4244 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4245                                          struct ocfs2_xattr_info *xi,
4246                                          struct ocfs2_xattr_search *xs,
4247                                          u32 name_hash,
4248                                          int local)
4249 {
4250         struct ocfs2_xattr_entry *last, *xe;
4251         int name_len = strlen(xi->name);
4252         struct ocfs2_xattr_header *xh = xs->header;
4253         u16 count = le16_to_cpu(xh->xh_count), start;
4254         size_t blocksize = inode->i_sb->s_blocksize;
4255         char *val;
4256         size_t offs, size, new_size;
4257
4258         last = &xh->xh_entries[count];
4259         if (!xs->not_found) {
4260                 xe = xs->here;
4261                 offs = le16_to_cpu(xe->xe_name_offset);
4262                 if (ocfs2_xattr_is_local(xe))
4263                         size = OCFS2_XATTR_SIZE(name_len) +
4264                         OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4265                 else
4266                         size = OCFS2_XATTR_SIZE(name_len) +
4267                         OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4268
4269                 /*
4270                  * If the new value will be stored outside, xi->value has been
4271                  * initalized as an empty ocfs2_xattr_value_root, and the same
4272                  * goes with xi->value_len, so we can set new_size safely here.
4273                  * See ocfs2_xattr_set_in_bucket.
4274                  */
4275                 new_size = OCFS2_XATTR_SIZE(name_len) +
4276                            OCFS2_XATTR_SIZE(xi->value_len);
4277
4278                 le16_add_cpu(&xh->xh_name_value_len, -size);
4279                 if (xi->value) {
4280                         if (new_size > size)
4281                                 goto set_new_name_value;
4282
4283                         /* Now replace the old value with new one. */
4284                         if (local)
4285                                 xe->xe_value_size = cpu_to_le64(xi->value_len);
4286                         else
4287                                 xe->xe_value_size = 0;
4288
4289                         val = ocfs2_xattr_bucket_get_val(inode,
4290                                                          xs->bucket, offs);
4291                         memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4292                                size - OCFS2_XATTR_SIZE(name_len));
4293                         if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4294                                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
4295                                        xi->value, xi->value_len);
4296
4297                         le16_add_cpu(&xh->xh_name_value_len, new_size);
4298                         ocfs2_xattr_set_local(xe, local);
4299                         return;
4300                 } else {
4301                         /*
4302                          * Remove the old entry if there is more than one.
4303                          * We don't remove the last entry so that we can
4304                          * use it to indicate the hash value of the empty
4305                          * bucket.
4306                          */
4307                         last -= 1;
4308                         le16_add_cpu(&xh->xh_count, -1);
4309                         if (xh->xh_count) {
4310                                 memmove(xe, xe + 1,
4311                                         (void *)last - (void *)xe);
4312                                 memset(last, 0,
4313                                        sizeof(struct ocfs2_xattr_entry));
4314                         } else
4315                                 xh->xh_free_start =
4316                                         cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4317
4318                         return;
4319                 }
4320         } else {
4321                 /* find a new entry for insert. */
4322                 int low = 0, high = count - 1, tmp;
4323                 struct ocfs2_xattr_entry *tmp_xe;
4324
4325                 while (low <= high && count) {
4326                         tmp = (low + high) / 2;
4327                         tmp_xe = &xh->xh_entries[tmp];
4328
4329                         if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4330                                 low = tmp + 1;
4331                         else if (name_hash <
4332                                  le32_to_cpu(tmp_xe->xe_name_hash))
4333                                 high = tmp - 1;
4334                         else {
4335                                 low = tmp;
4336                                 break;
4337                         }
4338                 }
4339
4340                 xe = &xh->xh_entries[low];
4341                 if (low != count)
4342                         memmove(xe + 1, xe, (void *)last - (void *)xe);
4343
4344                 le16_add_cpu(&xh->xh_count, 1);
4345                 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4346                 xe->xe_name_hash = cpu_to_le32(name_hash);
4347                 xe->xe_name_len = name_len;
4348                 ocfs2_xattr_set_type(xe, xi->name_index);
4349         }
4350
4351 set_new_name_value:
4352         /* Insert the new name+value. */
4353         size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4354
4355         /*
4356          * We must make sure that the name/value pair
4357          * exists in the same block.
4358          */
4359         offs = le16_to_cpu(xh->xh_free_start);
4360         start = offs - size;
4361
4362         if (start >> inode->i_sb->s_blocksize_bits !=
4363             (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4364                 offs = offs - offs % blocksize;
4365                 xh->xh_free_start = cpu_to_le16(offs);
4366         }
4367
4368         val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4369         xe->xe_name_offset = cpu_to_le16(offs - size);
4370
4371         memset(val, 0, size);
4372         memcpy(val, xi->name, name_len);
4373         memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4374
4375         xe->xe_value_size = cpu_to_le64(xi->value_len);
4376         ocfs2_xattr_set_local(xe, local);
4377         xs->here = xe;
4378         le16_add_cpu(&xh->xh_free_start, -size);
4379         le16_add_cpu(&xh->xh_name_value_len, size);
4380
4381         return;
4382 }
4383
4384 /*
4385  * Set the xattr entry in the specified bucket.
4386  * The bucket is indicated by xs->bucket and it should have the enough
4387  * space for the xattr insertion.
4388  */
4389 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4390                                            handle_t *handle,
4391                                            struct ocfs2_xattr_info *xi,
4392                                            struct ocfs2_xattr_search *xs,
4393                                            u32 name_hash,
4394                                            int local)
4395 {
4396         int ret;
4397         u64 blkno;
4398
4399         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4400              (unsigned long)xi->value_len, xi->name_index,
4401              (unsigned long long)bucket_blkno(xs->bucket));
4402
4403         if (!xs->bucket->bu_bhs[1]) {
4404                 blkno = bucket_blkno(xs->bucket);
4405                 ocfs2_xattr_bucket_relse(xs->bucket);
4406                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4407                 if (ret) {
4408                         mlog_errno(ret);
4409                         goto out;
4410                 }
4411         }
4412
4413         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4414                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4415         if (ret < 0) {
4416                 mlog_errno(ret);
4417                 goto out;
4418         }
4419
4420         ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4421         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4422
4423 out:
4424         return ret;
4425 }
4426
4427 static int ocfs2_xattr_value_update_size(struct inode *inode,
4428                                          handle_t *handle,
4429                                          struct buffer_head *xe_bh,
4430                                          struct ocfs2_xattr_entry *xe,
4431                                          u64 new_size)
4432 {
4433         int ret;
4434
4435         ret = ocfs2_journal_access(handle, inode, xe_bh,
4436                                    OCFS2_JOURNAL_ACCESS_WRITE);
4437         if (ret < 0) {
4438                 mlog_errno(ret);
4439                 goto out;
4440         }
4441
4442         xe->xe_value_size = cpu_to_le64(new_size);
4443
4444         ret = ocfs2_journal_dirty(handle, xe_bh);
4445         if (ret < 0)
4446                 mlog_errno(ret);
4447
4448 out:
4449         return ret;
4450 }
4451
4452 /*
4453  * Truncate the specified xe_off entry in xattr bucket.
4454  * bucket is indicated by header_bh and len is the new length.
4455  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4456  *
4457  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4458  */
4459 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4460                                              struct buffer_head *header_bh,
4461                                              int xe_off,
4462                                              int len,
4463                                              struct ocfs2_xattr_set_ctxt *ctxt)
4464 {
4465         int ret, offset;
4466         u64 value_blk;
4467         struct buffer_head *value_bh = NULL;
4468         struct ocfs2_xattr_value_root *xv;
4469         struct ocfs2_xattr_entry *xe;
4470         struct ocfs2_xattr_header *xh =
4471                         (struct ocfs2_xattr_header *)header_bh->b_data;
4472         size_t blocksize = inode->i_sb->s_blocksize;
4473
4474         xe = &xh->xh_entries[xe_off];
4475
4476         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4477
4478         offset = le16_to_cpu(xe->xe_name_offset) +
4479                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4480
4481         value_blk = offset / blocksize;
4482
4483         /* We don't allow ocfs2_xattr_value to be stored in different block. */
4484         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4485         value_blk += header_bh->b_blocknr;
4486
4487         ret = ocfs2_read_block(inode, value_blk, &value_bh);
4488         if (ret) {
4489                 mlog_errno(ret);
4490                 goto out;
4491         }
4492
4493         xv = (struct ocfs2_xattr_value_root *)
4494                 (value_bh->b_data + offset % blocksize);
4495
4496         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4497              xe_off, (unsigned long long)header_bh->b_blocknr, len);
4498         ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
4499         if (ret) {
4500                 mlog_errno(ret);
4501                 goto out;
4502         }
4503
4504         ret = ocfs2_xattr_value_update_size(inode, ctxt->handle,
4505                                             header_bh, xe, len);
4506         if (ret) {
4507                 mlog_errno(ret);
4508                 goto out;
4509         }
4510
4511 out:
4512         brelse(value_bh);
4513         return ret;
4514 }
4515
4516 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4517                                         struct ocfs2_xattr_search *xs,
4518                                         int len,
4519                                         struct ocfs2_xattr_set_ctxt *ctxt)
4520 {
4521         int ret, offset;
4522         struct ocfs2_xattr_entry *xe = xs->here;
4523         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4524
4525         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4526
4527         offset = xe - xh->xh_entries;
4528         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0],
4529                                                 offset, len, ctxt);
4530         if (ret)
4531                 mlog_errno(ret);
4532
4533         return ret;
4534 }
4535
4536 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4537                                                 handle_t *handle,
4538                                                 struct ocfs2_xattr_search *xs,
4539                                                 char *val,
4540                                                 int value_len)
4541 {
4542         int offset;
4543         struct ocfs2_xattr_value_root *xv;
4544         struct ocfs2_xattr_entry *xe = xs->here;
4545
4546         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4547
4548         offset = le16_to_cpu(xe->xe_name_offset) +
4549                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4550
4551         xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4552
4553         return __ocfs2_xattr_set_value_outside(inode, handle,
4554                                                xv, val, value_len);
4555 }
4556
4557 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4558                                   struct buffer_head *root_bh,
4559                                   u64 blkno,
4560                                   u32 cpos,
4561                                   u32 len)
4562 {
4563         int ret;
4564         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4565         struct inode *tl_inode = osb->osb_tl_inode;
4566         handle_t *handle;
4567         struct ocfs2_xattr_block *xb =
4568                         (struct ocfs2_xattr_block *)root_bh->b_data;
4569         struct ocfs2_alloc_context *meta_ac = NULL;
4570         struct ocfs2_cached_dealloc_ctxt dealloc;
4571         struct ocfs2_extent_tree et;
4572
4573         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4574
4575         ocfs2_init_dealloc_ctxt(&dealloc);
4576
4577         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4578              cpos, len, (unsigned long long)blkno);
4579
4580         ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4581
4582         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4583         if (ret) {
4584                 mlog_errno(ret);
4585                 return ret;
4586         }
4587
4588         mutex_lock(&tl_inode->i_mutex);
4589
4590         if (ocfs2_truncate_log_needs_flush(osb)) {
4591                 ret = __ocfs2_flush_truncate_log(osb);
4592                 if (ret < 0) {
4593                         mlog_errno(ret);
4594                         goto out;
4595                 }
4596         }
4597
4598         handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4599         if (IS_ERR(handle)) {
4600                 ret = -ENOMEM;
4601                 mlog_errno(ret);
4602                 goto out;
4603         }
4604
4605         ret = ocfs2_journal_access(handle, inode, root_bh,
4606                                    OCFS2_JOURNAL_ACCESS_WRITE);
4607         if (ret) {
4608                 mlog_errno(ret);
4609                 goto out_commit;
4610         }
4611
4612         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4613                                   &dealloc);
4614         if (ret) {
4615                 mlog_errno(ret);
4616                 goto out_commit;
4617         }
4618
4619         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4620
4621         ret = ocfs2_journal_dirty(handle, root_bh);
4622         if (ret) {
4623                 mlog_errno(ret);
4624                 goto out_commit;
4625         }
4626
4627         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4628         if (ret)
4629                 mlog_errno(ret);
4630
4631 out_commit:
4632         ocfs2_commit_trans(osb, handle);
4633 out:
4634         ocfs2_schedule_truncate_log_flush(osb, 1);
4635
4636         mutex_unlock(&tl_inode->i_mutex);
4637
4638         if (meta_ac)
4639                 ocfs2_free_alloc_context(meta_ac);
4640
4641         ocfs2_run_deallocs(osb, &dealloc);
4642
4643         return ret;
4644 }
4645
4646 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4647                                          handle_t *handle,
4648                                          struct ocfs2_xattr_search *xs)
4649 {
4650         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4651         struct ocfs2_xattr_entry *last = &xh->xh_entries[
4652                                                 le16_to_cpu(xh->xh_count) - 1];
4653         int ret = 0;
4654
4655         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4656                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4657         if (ret) {
4658                 mlog_errno(ret);
4659                 return;
4660         }
4661
4662         /* Remove the old entry. */
4663         memmove(xs->here, xs->here + 1,
4664                 (void *)last - (void *)xs->here);
4665         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4666         le16_add_cpu(&xh->xh_count, -1);
4667
4668         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4669 }
4670
4671 /*
4672  * Set the xattr name/value in the bucket specified in xs.
4673  *
4674  * As the new value in xi may be stored in the bucket or in an outside cluster,
4675  * we divide the whole process into 3 steps:
4676  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4677  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4678  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4679  * 4. If the clusters for the new outside value can't be allocated, we need
4680  *    to free the xattr we allocated in set.
4681  */
4682 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4683                                      struct ocfs2_xattr_info *xi,
4684                                      struct ocfs2_xattr_search *xs,
4685                                      struct ocfs2_xattr_set_ctxt *ctxt)
4686 {
4687         int ret, local = 1;
4688         size_t value_len;
4689         char *val = (char *)xi->value;
4690         struct ocfs2_xattr_entry *xe = xs->here;
4691         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4692                                               strlen(xi->name));
4693
4694         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4695                 /*
4696                  * We need to truncate the xattr storage first.
4697                  *
4698                  * If both the old and new value are stored to
4699                  * outside block, we only need to truncate
4700                  * the storage and then set the value outside.
4701                  *
4702                  * If the new value should be stored within block,
4703                  * we should free all the outside block first and
4704                  * the modification to the xattr block will be done
4705                  * by following steps.
4706                  */
4707                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4708                         value_len = xi->value_len;
4709                 else
4710                         value_len = 0;
4711
4712                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4713                                                            value_len,
4714                                                            ctxt);
4715                 if (ret)
4716                         goto out;
4717
4718                 if (value_len)
4719                         goto set_value_outside;
4720         }
4721
4722         value_len = xi->value_len;
4723         /* So we have to handle the inside block change now. */
4724         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4725                 /*
4726                  * If the new value will be stored outside of block,
4727                  * initalize a new empty value root and insert it first.
4728                  */
4729                 local = 0;
4730                 xi->value = &def_xv;
4731                 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4732         }
4733
4734         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
4735                                               name_hash, local);
4736         if (ret) {
4737                 mlog_errno(ret);
4738                 goto out;
4739         }
4740
4741         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
4742                 goto out;
4743
4744         /* allocate the space now for the outside block storage. */
4745         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4746                                                    value_len, ctxt);
4747         if (ret) {
4748                 mlog_errno(ret);
4749
4750                 if (xs->not_found) {
4751                         /*
4752                          * We can't allocate enough clusters for outside
4753                          * storage and we have allocated xattr already,
4754                          * so need to remove it.
4755                          */
4756                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
4757                 }
4758                 goto out;
4759         }
4760
4761 set_value_outside:
4762         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
4763                                                    xs, val, value_len);
4764 out:
4765         return ret;
4766 }
4767
4768 /*
4769  * check whether the xattr bucket is filled up with the same hash value.
4770  * If we want to insert the xattr with the same hash, return -ENOSPC.
4771  * If we want to insert a xattr with different hash value, go ahead
4772  * and ocfs2_divide_xattr_bucket will handle this.
4773  */
4774 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4775                                               struct ocfs2_xattr_bucket *bucket,
4776                                               const char *name)
4777 {
4778         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4779         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4780
4781         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
4782                 return 0;
4783
4784         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4785             xh->xh_entries[0].xe_name_hash) {
4786                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4787                      "hash = %u\n",
4788                      (unsigned long long)bucket_blkno(bucket),
4789                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4790                 return -ENOSPC;
4791         }
4792
4793         return 0;
4794 }
4795
4796 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4797                                              struct ocfs2_xattr_info *xi,
4798                                              struct ocfs2_xattr_search *xs,
4799                                              struct ocfs2_xattr_set_ctxt *ctxt)
4800 {
4801         struct ocfs2_xattr_header *xh;
4802         struct ocfs2_xattr_entry *xe;
4803         u16 count, header_size, xh_free_start;
4804         int free, max_free, need, old;
4805         size_t value_size = 0, name_len = strlen(xi->name);
4806         size_t blocksize = inode->i_sb->s_blocksize;
4807         int ret, allocation = 0;
4808
4809         mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4810
4811 try_again:
4812         xh = xs->header;
4813         count = le16_to_cpu(xh->xh_count);
4814         xh_free_start = le16_to_cpu(xh->xh_free_start);
4815         header_size = sizeof(struct ocfs2_xattr_header) +
4816                         count * sizeof(struct ocfs2_xattr_entry);
4817         max_free = OCFS2_XATTR_BUCKET_SIZE -
4818                 le16_to_cpu(xh->xh_name_value_len) - header_size;
4819
4820         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4821                         "of %u which exceed block size\n",
4822                         (unsigned long long)bucket_blkno(xs->bucket),
4823                         header_size);
4824
4825         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4826                 value_size = OCFS2_XATTR_ROOT_SIZE;
4827         else if (xi->value)
4828                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
4829
4830         if (xs->not_found)
4831                 need = sizeof(struct ocfs2_xattr_entry) +
4832                         OCFS2_XATTR_SIZE(name_len) + value_size;
4833         else {
4834                 need = value_size + OCFS2_XATTR_SIZE(name_len);
4835
4836                 /*
4837                  * We only replace the old value if the new length is smaller
4838                  * than the old one. Otherwise we will allocate new space in the
4839                  * bucket to store it.
4840                  */
4841                 xe = xs->here;
4842                 if (ocfs2_xattr_is_local(xe))
4843                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4844                 else
4845                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4846
4847                 if (old >= value_size)
4848                         need = 0;
4849         }
4850
4851         free = xh_free_start - header_size;
4852         /*
4853          * We need to make sure the new name/value pair
4854          * can exist in the same block.
4855          */
4856         if (xh_free_start % blocksize < need)
4857                 free -= xh_free_start % blocksize;
4858
4859         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4860              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4861              " %u\n", xs->not_found,
4862              (unsigned long long)bucket_blkno(xs->bucket),
4863              free, need, max_free, le16_to_cpu(xh->xh_free_start),
4864              le16_to_cpu(xh->xh_name_value_len));
4865
4866         if (free < need ||
4867             (xs->not_found &&
4868              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
4869                 if (need <= max_free &&
4870                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4871                         /*
4872                          * We can create the space by defragment. Since only the
4873                          * name/value will be moved, the xe shouldn't be changed
4874                          * in xs.
4875                          */
4876                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
4877                                                         xs->bucket);
4878                         if (ret) {
4879                                 mlog_errno(ret);
4880                                 goto out;
4881                         }
4882
4883                         xh_free_start = le16_to_cpu(xh->xh_free_start);
4884                         free = xh_free_start - header_size;
4885                         if (xh_free_start % blocksize < need)
4886                                 free -= xh_free_start % blocksize;
4887
4888                         if (free >= need)
4889                                 goto xattr_set;
4890
4891                         mlog(0, "Can't get enough space for xattr insert by "
4892                              "defragment. Need %u bytes, but we have %d, so "
4893                              "allocate new bucket for it.\n", need, free);
4894                 }
4895
4896                 /*
4897                  * We have to add new buckets or clusters and one
4898                  * allocation should leave us enough space for insert.
4899                  */
4900                 BUG_ON(allocation);
4901
4902                 /*
4903                  * We do not allow for overlapping ranges between buckets. And
4904                  * the maximum number of collisions we will allow for then is
4905                  * one bucket's worth, so check it here whether we need to
4906                  * add a new bucket for the insert.
4907                  */
4908                 ret = ocfs2_check_xattr_bucket_collision(inode,
4909                                                          xs->bucket,
4910                                                          xi->name);
4911                 if (ret) {
4912                         mlog_errno(ret);
4913                         goto out;
4914                 }
4915
4916                 ret = ocfs2_add_new_xattr_bucket(inode,
4917                                                  xs->xattr_bh,
4918                                                  xs->bucket->bu_bhs[0],
4919                                                  ctxt);
4920                 if (ret) {
4921                         mlog_errno(ret);
4922                         goto out;
4923                 }
4924
4925                 ocfs2_xattr_bucket_relse(xs->bucket);
4926
4927                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4928                                                    xi->name_index,
4929                                                    xi->name, xs);
4930                 if (ret && ret != -ENODATA)
4931                         goto out;
4932                 xs->not_found = ret;
4933                 allocation = 1;
4934                 goto try_again;
4935         }
4936
4937 xattr_set:
4938         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
4939 out:
4940         mlog_exit(ret);
4941         return ret;
4942 }
4943
4944 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4945                                         struct ocfs2_xattr_bucket *bucket,
4946                                         void *para)
4947 {
4948         int ret = 0;
4949         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4950         u16 i;
4951         struct ocfs2_xattr_entry *xe;
4952         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4953         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
4954
4955         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
4956
4957         ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4958         if (IS_ERR(ctxt.handle)) {
4959                 ret = PTR_ERR(ctxt.handle);
4960                 mlog_errno(ret);
4961                 goto out;
4962         }
4963
4964         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4965                 xe = &xh->xh_entries[i];
4966                 if (ocfs2_xattr_is_local(xe))
4967                         continue;
4968
4969                 ret = ocfs2_xattr_bucket_value_truncate(inode,
4970                                                         bucket->bu_bhs[0],
4971                                                         i, 0, &ctxt);
4972                 if (ret) {
4973                         mlog_errno(ret);
4974                         break;
4975                 }
4976         }
4977
4978         ret = ocfs2_commit_trans(osb, ctxt.handle);
4979         ocfs2_schedule_truncate_log_flush(osb, 1);
4980         ocfs2_run_deallocs(osb, &ctxt.dealloc);
4981 out:
4982         return ret;
4983 }
4984
4985 static int ocfs2_delete_xattr_index_block(struct inode *inode,
4986                                           struct buffer_head *xb_bh)
4987 {
4988         struct ocfs2_xattr_block *xb =
4989                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4990         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4991         int ret = 0;
4992         u32 name_hash = UINT_MAX, e_cpos, num_clusters;
4993         u64 p_blkno;
4994
4995         if (le16_to_cpu(el->l_next_free_rec) == 0)
4996                 return 0;
4997
4998         while (name_hash > 0) {
4999                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
5000                                           &e_cpos, &num_clusters, el);
5001                 if (ret) {
5002                         mlog_errno(ret);
5003                         goto out;
5004                 }
5005
5006                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
5007                                                   ocfs2_delete_xattr_in_bucket,
5008                                                   NULL);
5009                 if (ret) {
5010                         mlog_errno(ret);
5011                         goto out;
5012                 }
5013
5014                 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
5015                                              p_blkno, e_cpos, num_clusters);
5016                 if (ret) {
5017                         mlog_errno(ret);
5018                         break;
5019                 }
5020
5021                 if (e_cpos == 0)
5022                         break;
5023
5024                 name_hash = e_cpos - 1;
5025         }
5026
5027 out:
5028         return ret;
5029 }
5030
5031 /*
5032  * 'security' attributes support
5033  */
5034 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
5035                                         size_t list_size, const char *name,
5036                                         size_t name_len)
5037 {
5038         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
5039         const size_t total_len = prefix_len + name_len + 1;
5040
5041         if (list && total_len <= list_size) {
5042                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
5043                 memcpy(list + prefix_len, name, name_len);
5044                 list[prefix_len + name_len] = '\0';
5045         }
5046         return total_len;
5047 }
5048
5049 static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
5050                                     void *buffer, size_t size)
5051 {
5052         if (strcmp(name, "") == 0)
5053                 return -EINVAL;
5054         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
5055                                buffer, size);
5056 }
5057
5058 static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
5059                                     const void *value, size_t size, int flags)
5060 {
5061         if (strcmp(name, "") == 0)
5062                 return -EINVAL;
5063
5064         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
5065                                size, flags);
5066 }
5067
5068 int ocfs2_init_security_get(struct inode *inode,
5069                             struct inode *dir,
5070                             struct ocfs2_security_xattr_info *si)
5071 {
5072         return security_inode_init_security(inode, dir, &si->name, &si->value,
5073                                             &si->value_len);
5074 }
5075
5076 int ocfs2_init_security_set(handle_t *handle,
5077                             struct inode *inode,
5078                             struct buffer_head *di_bh,
5079                             struct ocfs2_security_xattr_info *si,
5080                             struct ocfs2_alloc_context *xattr_ac,
5081                             struct ocfs2_alloc_context *data_ac)
5082 {
5083         return ocfs2_xattr_set_handle(handle, inode, di_bh,
5084                                      OCFS2_XATTR_INDEX_SECURITY,
5085                                      si->name, si->value, si->value_len, 0,
5086                                      xattr_ac, data_ac);
5087 }
5088
5089 struct xattr_handler ocfs2_xattr_security_handler = {
5090         .prefix = XATTR_SECURITY_PREFIX,
5091         .list   = ocfs2_xattr_security_list,
5092         .get    = ocfs2_xattr_security_get,
5093         .set    = ocfs2_xattr_security_set,
5094 };
5095
5096 /*
5097  * 'trusted' attributes support
5098  */
5099 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
5100                                        size_t list_size, const char *name,
5101                                        size_t name_len)
5102 {
5103         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
5104         const size_t total_len = prefix_len + name_len + 1;
5105
5106         if (list && total_len <= list_size) {
5107                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
5108                 memcpy(list + prefix_len, name, name_len);
5109                 list[prefix_len + name_len] = '\0';
5110         }
5111         return total_len;
5112 }
5113
5114 static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
5115                                    void *buffer, size_t size)
5116 {
5117         if (strcmp(name, "") == 0)
5118                 return -EINVAL;
5119         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
5120                                buffer, size);
5121 }
5122
5123 static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
5124                                    const void *value, size_t size, int flags)
5125 {
5126         if (strcmp(name, "") == 0)
5127                 return -EINVAL;
5128
5129         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
5130                                size, flags);
5131 }
5132
5133 struct xattr_handler ocfs2_xattr_trusted_handler = {
5134         .prefix = XATTR_TRUSTED_PREFIX,
5135         .list   = ocfs2_xattr_trusted_list,
5136         .get    = ocfs2_xattr_trusted_get,
5137         .set    = ocfs2_xattr_trusted_set,
5138 };
5139
5140 /*
5141  * 'user' attributes support
5142  */
5143 static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
5144                                     size_t list_size, const char *name,
5145                                     size_t name_len)
5146 {
5147         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
5148         const size_t total_len = prefix_len + name_len + 1;
5149         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5150
5151         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5152                 return 0;
5153
5154         if (list && total_len <= list_size) {
5155                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
5156                 memcpy(list + prefix_len, name, name_len);
5157                 list[prefix_len + name_len] = '\0';
5158         }
5159         return total_len;
5160 }
5161
5162 static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
5163                                 void *buffer, size_t size)
5164 {
5165         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5166
5167         if (strcmp(name, "") == 0)
5168                 return -EINVAL;
5169         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5170                 return -EOPNOTSUPP;
5171         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
5172                                buffer, size);
5173 }
5174
5175 static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
5176                                 const void *value, size_t size, int flags)
5177 {
5178         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5179
5180         if (strcmp(name, "") == 0)
5181                 return -EINVAL;
5182         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5183                 return -EOPNOTSUPP;
5184
5185         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
5186                                size, flags);
5187 }
5188
5189 struct xattr_handler ocfs2_xattr_user_handler = {
5190         .prefix = XATTR_USER_PREFIX,
5191         .list   = ocfs2_xattr_user_list,
5192         .get    = ocfs2_xattr_user_get,
5193         .set    = ocfs2_xattr_user_set,
5194 };