ocfs2: add ocfs2_xattr_get_nolock
[linux-2.6.git] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "dlmglue.h"
46 #include "file.h"
47 #include "symlink.h"
48 #include "sysfile.h"
49 #include "inode.h"
50 #include "journal.h"
51 #include "ocfs2_fs.h"
52 #include "suballoc.h"
53 #include "uptodate.h"
54 #include "buffer_head_io.h"
55 #include "super.h"
56 #include "xattr.h"
57
58
59 struct ocfs2_xattr_def_value_root {
60         struct ocfs2_xattr_value_root   xv;
61         struct ocfs2_extent_rec         er;
62 };
63
64 struct ocfs2_xattr_bucket {
65         /* The inode these xattrs are associated with */
66         struct inode *bu_inode;
67
68         /* The actual buffers that make up the bucket */
69         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
70
71         /* How many blocks make up one bucket for this filesystem */
72         int bu_blocks;
73 };
74
75 struct ocfs2_xattr_set_ctxt {
76         handle_t *handle;
77         struct ocfs2_alloc_context *meta_ac;
78         struct ocfs2_alloc_context *data_ac;
79         struct ocfs2_cached_dealloc_ctxt dealloc;
80 };
81
82 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
83 #define OCFS2_XATTR_INLINE_SIZE 80
84 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
85                                          - sizeof(struct ocfs2_xattr_header) \
86                                          - sizeof(__u32))
87
88 static struct ocfs2_xattr_def_value_root def_xv = {
89         .xv.xr_list.l_count = cpu_to_le16(1),
90 };
91
92 struct xattr_handler *ocfs2_xattr_handlers[] = {
93         &ocfs2_xattr_user_handler,
94         &ocfs2_xattr_trusted_handler,
95         &ocfs2_xattr_security_handler,
96         NULL
97 };
98
99 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
100         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
101         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
102         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
103 };
104
105 struct ocfs2_xattr_info {
106         int name_index;
107         const char *name;
108         const void *value;
109         size_t value_len;
110 };
111
112 struct ocfs2_xattr_search {
113         struct buffer_head *inode_bh;
114         /*
115          * xattr_bh point to the block buffer head which has extended attribute
116          * when extended attribute in inode, xattr_bh is equal to inode_bh.
117          */
118         struct buffer_head *xattr_bh;
119         struct ocfs2_xattr_header *header;
120         struct ocfs2_xattr_bucket *bucket;
121         void *base;
122         void *end;
123         struct ocfs2_xattr_entry *here;
124         int not_found;
125 };
126
127 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
128                                              struct ocfs2_xattr_header *xh,
129                                              int index,
130                                              int *block_off,
131                                              int *new_offset);
132
133 static int ocfs2_xattr_block_find(struct inode *inode,
134                                   int name_index,
135                                   const char *name,
136                                   struct ocfs2_xattr_search *xs);
137 static int ocfs2_xattr_index_block_find(struct inode *inode,
138                                         struct buffer_head *root_bh,
139                                         int name_index,
140                                         const char *name,
141                                         struct ocfs2_xattr_search *xs);
142
143 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
144                                         struct ocfs2_xattr_tree_root *xt,
145                                         char *buffer,
146                                         size_t buffer_size);
147
148 static int ocfs2_xattr_create_index_block(struct inode *inode,
149                                           struct ocfs2_xattr_search *xs,
150                                           struct ocfs2_xattr_set_ctxt *ctxt);
151
152 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
153                                              struct ocfs2_xattr_info *xi,
154                                              struct ocfs2_xattr_search *xs,
155                                              struct ocfs2_xattr_set_ctxt *ctxt);
156
157 static int ocfs2_delete_xattr_index_block(struct inode *inode,
158                                           struct buffer_head *xb_bh);
159
160 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
161 {
162         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
163 }
164
165 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
166 {
167         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
168 }
169
170 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
171 {
172         u16 len = sb->s_blocksize -
173                  offsetof(struct ocfs2_xattr_header, xh_entries);
174
175         return len / sizeof(struct ocfs2_xattr_entry);
176 }
177
178 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
179 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
180 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
181
182 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
183 {
184         struct ocfs2_xattr_bucket *bucket;
185         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
186
187         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
188
189         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
190         if (bucket) {
191                 bucket->bu_inode = inode;
192                 bucket->bu_blocks = blks;
193         }
194
195         return bucket;
196 }
197
198 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
199 {
200         int i;
201
202         for (i = 0; i < bucket->bu_blocks; i++) {
203                 brelse(bucket->bu_bhs[i]);
204                 bucket->bu_bhs[i] = NULL;
205         }
206 }
207
208 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
209 {
210         if (bucket) {
211                 ocfs2_xattr_bucket_relse(bucket);
212                 bucket->bu_inode = NULL;
213                 kfree(bucket);
214         }
215 }
216
217 /*
218  * A bucket that has never been written to disk doesn't need to be
219  * read.  We just need the buffer_heads.  Don't call this for
220  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
221  * them fully.
222  */
223 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
224                                    u64 xb_blkno)
225 {
226         int i, rc = 0;
227
228         for (i = 0; i < bucket->bu_blocks; i++) {
229                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
230                                               xb_blkno + i);
231                 if (!bucket->bu_bhs[i]) {
232                         rc = -EIO;
233                         mlog_errno(rc);
234                         break;
235                 }
236
237                 if (!ocfs2_buffer_uptodate(bucket->bu_inode,
238                                            bucket->bu_bhs[i]))
239                         ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
240                                                       bucket->bu_bhs[i]);
241         }
242
243         if (rc)
244                 ocfs2_xattr_bucket_relse(bucket);
245         return rc;
246 }
247
248 /* Read the xattr bucket at xb_blkno */
249 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
250                                    u64 xb_blkno)
251 {
252         int rc;
253
254         rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
255                                bucket->bu_blocks, bucket->bu_bhs, 0);
256         if (rc)
257                 ocfs2_xattr_bucket_relse(bucket);
258         return rc;
259 }
260
261 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
262                                              struct ocfs2_xattr_bucket *bucket,
263                                              int type)
264 {
265         int i, rc = 0;
266
267         for (i = 0; i < bucket->bu_blocks; i++) {
268                 rc = ocfs2_journal_access(handle, bucket->bu_inode,
269                                           bucket->bu_bhs[i], type);
270                 if (rc) {
271                         mlog_errno(rc);
272                         break;
273                 }
274         }
275
276         return rc;
277 }
278
279 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
280                                              struct ocfs2_xattr_bucket *bucket)
281 {
282         int i;
283
284         for (i = 0; i < bucket->bu_blocks; i++)
285                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
286 }
287
288 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
289                                          struct ocfs2_xattr_bucket *src)
290 {
291         int i;
292         int blocksize = src->bu_inode->i_sb->s_blocksize;
293
294         BUG_ON(dest->bu_blocks != src->bu_blocks);
295         BUG_ON(dest->bu_inode != src->bu_inode);
296
297         for (i = 0; i < src->bu_blocks; i++) {
298                 memcpy(bucket_block(dest, i), bucket_block(src, i),
299                        blocksize);
300         }
301 }
302
303 static inline const char *ocfs2_xattr_prefix(int name_index)
304 {
305         struct xattr_handler *handler = NULL;
306
307         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
308                 handler = ocfs2_xattr_handler_map[name_index];
309
310         return handler ? handler->prefix : NULL;
311 }
312
313 static u32 ocfs2_xattr_name_hash(struct inode *inode,
314                                  const char *name,
315                                  int name_len)
316 {
317         /* Get hash value of uuid from super block */
318         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
319         int i;
320
321         /* hash extended attribute name */
322         for (i = 0; i < name_len; i++) {
323                 hash = (hash << OCFS2_HASH_SHIFT) ^
324                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
325                        *name++;
326         }
327
328         return hash;
329 }
330
331 /*
332  * ocfs2_xattr_hash_entry()
333  *
334  * Compute the hash of an extended attribute.
335  */
336 static void ocfs2_xattr_hash_entry(struct inode *inode,
337                                    struct ocfs2_xattr_header *header,
338                                    struct ocfs2_xattr_entry *entry)
339 {
340         u32 hash = 0;
341         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
342
343         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
344         entry->xe_name_hash = cpu_to_le32(hash);
345
346         return;
347 }
348
349 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
350 {
351         int size = 0;
352
353         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
354                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
355         else
356                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
357         size += sizeof(struct ocfs2_xattr_entry);
358
359         return size;
360 }
361
362 int ocfs2_calc_security_init(struct inode *dir,
363                              struct ocfs2_security_xattr_info *si,
364                              int *want_clusters,
365                              int *xattr_credits,
366                              struct ocfs2_alloc_context **xattr_ac)
367 {
368         int ret = 0;
369         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
370         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
371                                                  si->value_len);
372
373         /*
374          * The max space of security xattr taken inline is
375          * 256(name) + 80(value) + 16(entry) = 352 bytes,
376          * So reserve one metadata block for it is ok.
377          */
378         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
379             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
380                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
381                 if (ret) {
382                         mlog_errno(ret);
383                         return ret;
384                 }
385                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
386         }
387
388         /* reserve clusters for xattr value which will be set in B tree*/
389         if (si->value_len > OCFS2_XATTR_INLINE_SIZE)
390                 *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
391                                                            si->value_len);
392         return ret;
393 }
394
395 static int ocfs2_xattr_extend_allocation(struct inode *inode,
396                                          u32 clusters_to_add,
397                                          struct buffer_head *xattr_bh,
398                                          struct ocfs2_xattr_value_root *xv,
399                                          struct ocfs2_xattr_set_ctxt *ctxt)
400 {
401         int status = 0;
402         handle_t *handle = ctxt->handle;
403         enum ocfs2_alloc_restarted why;
404         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
405         u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
406         struct ocfs2_extent_tree et;
407
408         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
409
410         ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
411
412         status = ocfs2_journal_access(handle, inode, xattr_bh,
413                                       OCFS2_JOURNAL_ACCESS_WRITE);
414         if (status < 0) {
415                 mlog_errno(status);
416                 goto leave;
417         }
418
419         prev_clusters = le32_to_cpu(xv->xr_clusters);
420         status = ocfs2_add_clusters_in_btree(osb,
421                                              inode,
422                                              &logical_start,
423                                              clusters_to_add,
424                                              0,
425                                              &et,
426                                              handle,
427                                              ctxt->data_ac,
428                                              ctxt->meta_ac,
429                                              &why);
430         if (status < 0) {
431                 mlog_errno(status);
432                 goto leave;
433         }
434
435         status = ocfs2_journal_dirty(handle, xattr_bh);
436         if (status < 0) {
437                 mlog_errno(status);
438                 goto leave;
439         }
440
441         clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
442
443         /*
444          * We should have already allocated enough space before the transaction,
445          * so no need to restart.
446          */
447         BUG_ON(why != RESTART_NONE || clusters_to_add);
448
449 leave:
450
451         return status;
452 }
453
454 static int __ocfs2_remove_xattr_range(struct inode *inode,
455                                       struct buffer_head *root_bh,
456                                       struct ocfs2_xattr_value_root *xv,
457                                       u32 cpos, u32 phys_cpos, u32 len,
458                                       struct ocfs2_xattr_set_ctxt *ctxt)
459 {
460         int ret;
461         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
462         handle_t *handle = ctxt->handle;
463         struct ocfs2_extent_tree et;
464
465         ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
466
467         ret = ocfs2_journal_access(handle, inode, root_bh,
468                                    OCFS2_JOURNAL_ACCESS_WRITE);
469         if (ret) {
470                 mlog_errno(ret);
471                 goto out;
472         }
473
474         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
475                                   &ctxt->dealloc);
476         if (ret) {
477                 mlog_errno(ret);
478                 goto out;
479         }
480
481         le32_add_cpu(&xv->xr_clusters, -len);
482
483         ret = ocfs2_journal_dirty(handle, root_bh);
484         if (ret) {
485                 mlog_errno(ret);
486                 goto out;
487         }
488
489         ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
490         if (ret)
491                 mlog_errno(ret);
492
493 out:
494         return ret;
495 }
496
497 static int ocfs2_xattr_shrink_size(struct inode *inode,
498                                    u32 old_clusters,
499                                    u32 new_clusters,
500                                    struct buffer_head *root_bh,
501                                    struct ocfs2_xattr_value_root *xv,
502                                    struct ocfs2_xattr_set_ctxt *ctxt)
503 {
504         int ret = 0;
505         u32 trunc_len, cpos, phys_cpos, alloc_size;
506         u64 block;
507
508         if (old_clusters <= new_clusters)
509                 return 0;
510
511         cpos = new_clusters;
512         trunc_len = old_clusters - new_clusters;
513         while (trunc_len) {
514                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
515                                                &alloc_size, &xv->xr_list);
516                 if (ret) {
517                         mlog_errno(ret);
518                         goto out;
519                 }
520
521                 if (alloc_size > trunc_len)
522                         alloc_size = trunc_len;
523
524                 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
525                                                  phys_cpos, alloc_size,
526                                                  ctxt);
527                 if (ret) {
528                         mlog_errno(ret);
529                         goto out;
530                 }
531
532                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
533                 ocfs2_remove_xattr_clusters_from_cache(inode, block,
534                                                        alloc_size);
535                 cpos += alloc_size;
536                 trunc_len -= alloc_size;
537         }
538
539 out:
540         return ret;
541 }
542
543 static int ocfs2_xattr_value_truncate(struct inode *inode,
544                                       struct buffer_head *root_bh,
545                                       struct ocfs2_xattr_value_root *xv,
546                                       int len,
547                                       struct ocfs2_xattr_set_ctxt *ctxt)
548 {
549         int ret;
550         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
551         u32 old_clusters = le32_to_cpu(xv->xr_clusters);
552
553         if (new_clusters == old_clusters)
554                 return 0;
555
556         if (new_clusters > old_clusters)
557                 ret = ocfs2_xattr_extend_allocation(inode,
558                                                     new_clusters - old_clusters,
559                                                     root_bh, xv, ctxt);
560         else
561                 ret = ocfs2_xattr_shrink_size(inode,
562                                               old_clusters, new_clusters,
563                                               root_bh, xv, ctxt);
564
565         return ret;
566 }
567
568 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
569                                   size_t *result, const char *prefix,
570                                   const char *name, int name_len)
571 {
572         char *p = buffer + *result;
573         int prefix_len = strlen(prefix);
574         int total_len = prefix_len + name_len + 1;
575
576         *result += total_len;
577
578         /* we are just looking for how big our buffer needs to be */
579         if (!size)
580                 return 0;
581
582         if (*result > size)
583                 return -ERANGE;
584
585         memcpy(p, prefix, prefix_len);
586         memcpy(p + prefix_len, name, name_len);
587         p[prefix_len + name_len] = '\0';
588
589         return 0;
590 }
591
592 static int ocfs2_xattr_list_entries(struct inode *inode,
593                                     struct ocfs2_xattr_header *header,
594                                     char *buffer, size_t buffer_size)
595 {
596         size_t result = 0;
597         int i, type, ret;
598         const char *prefix, *name;
599
600         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
601                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
602                 type = ocfs2_xattr_get_type(entry);
603                 prefix = ocfs2_xattr_prefix(type);
604
605                 if (prefix) {
606                         name = (const char *)header +
607                                 le16_to_cpu(entry->xe_name_offset);
608
609                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
610                                                      &result, prefix, name,
611                                                      entry->xe_name_len);
612                         if (ret)
613                                 return ret;
614                 }
615         }
616
617         return result;
618 }
619
620 static int ocfs2_xattr_ibody_list(struct inode *inode,
621                                   struct ocfs2_dinode *di,
622                                   char *buffer,
623                                   size_t buffer_size)
624 {
625         struct ocfs2_xattr_header *header = NULL;
626         struct ocfs2_inode_info *oi = OCFS2_I(inode);
627         int ret = 0;
628
629         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
630                 return ret;
631
632         header = (struct ocfs2_xattr_header *)
633                  ((void *)di + inode->i_sb->s_blocksize -
634                  le16_to_cpu(di->i_xattr_inline_size));
635
636         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
637
638         return ret;
639 }
640
641 static int ocfs2_xattr_block_list(struct inode *inode,
642                                   struct ocfs2_dinode *di,
643                                   char *buffer,
644                                   size_t buffer_size)
645 {
646         struct buffer_head *blk_bh = NULL;
647         struct ocfs2_xattr_block *xb;
648         int ret = 0;
649
650         if (!di->i_xattr_loc)
651                 return ret;
652
653         ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
654         if (ret < 0) {
655                 mlog_errno(ret);
656                 return ret;
657         }
658
659         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
660         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
661                 ret = -EIO;
662                 goto cleanup;
663         }
664
665         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
666                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
667                 ret = ocfs2_xattr_list_entries(inode, header,
668                                                buffer, buffer_size);
669         } else {
670                 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
671                 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
672                                                    buffer, buffer_size);
673         }
674 cleanup:
675         brelse(blk_bh);
676
677         return ret;
678 }
679
680 ssize_t ocfs2_listxattr(struct dentry *dentry,
681                         char *buffer,
682                         size_t size)
683 {
684         int ret = 0, i_ret = 0, b_ret = 0;
685         struct buffer_head *di_bh = NULL;
686         struct ocfs2_dinode *di = NULL;
687         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
688
689         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
690                 return -EOPNOTSUPP;
691
692         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
693                 return ret;
694
695         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
696         if (ret < 0) {
697                 mlog_errno(ret);
698                 return ret;
699         }
700
701         di = (struct ocfs2_dinode *)di_bh->b_data;
702
703         down_read(&oi->ip_xattr_sem);
704         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
705         if (i_ret < 0)
706                 b_ret = 0;
707         else {
708                 if (buffer) {
709                         buffer += i_ret;
710                         size -= i_ret;
711                 }
712                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
713                                                buffer, size);
714                 if (b_ret < 0)
715                         i_ret = 0;
716         }
717         up_read(&oi->ip_xattr_sem);
718         ocfs2_inode_unlock(dentry->d_inode, 0);
719
720         brelse(di_bh);
721
722         return i_ret + b_ret;
723 }
724
725 static int ocfs2_xattr_find_entry(int name_index,
726                                   const char *name,
727                                   struct ocfs2_xattr_search *xs)
728 {
729         struct ocfs2_xattr_entry *entry;
730         size_t name_len;
731         int i, cmp = 1;
732
733         if (name == NULL)
734                 return -EINVAL;
735
736         name_len = strlen(name);
737         entry = xs->here;
738         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
739                 cmp = name_index - ocfs2_xattr_get_type(entry);
740                 if (!cmp)
741                         cmp = name_len - entry->xe_name_len;
742                 if (!cmp)
743                         cmp = memcmp(name, (xs->base +
744                                      le16_to_cpu(entry->xe_name_offset)),
745                                      name_len);
746                 if (cmp == 0)
747                         break;
748                 entry += 1;
749         }
750         xs->here = entry;
751
752         return cmp ? -ENODATA : 0;
753 }
754
755 static int ocfs2_xattr_get_value_outside(struct inode *inode,
756                                          struct ocfs2_xattr_value_root *xv,
757                                          void *buffer,
758                                          size_t len)
759 {
760         u32 cpos, p_cluster, num_clusters, bpc, clusters;
761         u64 blkno;
762         int i, ret = 0;
763         size_t cplen, blocksize;
764         struct buffer_head *bh = NULL;
765         struct ocfs2_extent_list *el;
766
767         el = &xv->xr_list;
768         clusters = le32_to_cpu(xv->xr_clusters);
769         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
770         blocksize = inode->i_sb->s_blocksize;
771
772         cpos = 0;
773         while (cpos < clusters) {
774                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
775                                                &num_clusters, el);
776                 if (ret) {
777                         mlog_errno(ret);
778                         goto out;
779                 }
780
781                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
782                 /* Copy ocfs2_xattr_value */
783                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
784                         ret = ocfs2_read_block(inode, blkno, &bh);
785                         if (ret) {
786                                 mlog_errno(ret);
787                                 goto out;
788                         }
789
790                         cplen = len >= blocksize ? blocksize : len;
791                         memcpy(buffer, bh->b_data, cplen);
792                         len -= cplen;
793                         buffer += cplen;
794
795                         brelse(bh);
796                         bh = NULL;
797                         if (len == 0)
798                                 break;
799                 }
800                 cpos += num_clusters;
801         }
802 out:
803         return ret;
804 }
805
806 static int ocfs2_xattr_ibody_get(struct inode *inode,
807                                  int name_index,
808                                  const char *name,
809                                  void *buffer,
810                                  size_t buffer_size,
811                                  struct ocfs2_xattr_search *xs)
812 {
813         struct ocfs2_inode_info *oi = OCFS2_I(inode);
814         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
815         struct ocfs2_xattr_value_root *xv;
816         size_t size;
817         int ret = 0;
818
819         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
820                 return -ENODATA;
821
822         xs->end = (void *)di + inode->i_sb->s_blocksize;
823         xs->header = (struct ocfs2_xattr_header *)
824                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
825         xs->base = (void *)xs->header;
826         xs->here = xs->header->xh_entries;
827
828         ret = ocfs2_xattr_find_entry(name_index, name, xs);
829         if (ret)
830                 return ret;
831         size = le64_to_cpu(xs->here->xe_value_size);
832         if (buffer) {
833                 if (size > buffer_size)
834                         return -ERANGE;
835                 if (ocfs2_xattr_is_local(xs->here)) {
836                         memcpy(buffer, (void *)xs->base +
837                                le16_to_cpu(xs->here->xe_name_offset) +
838                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
839                 } else {
840                         xv = (struct ocfs2_xattr_value_root *)
841                                 (xs->base + le16_to_cpu(
842                                  xs->here->xe_name_offset) +
843                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
844                         ret = ocfs2_xattr_get_value_outside(inode, xv,
845                                                             buffer, size);
846                         if (ret < 0) {
847                                 mlog_errno(ret);
848                                 return ret;
849                         }
850                 }
851         }
852
853         return size;
854 }
855
856 static int ocfs2_xattr_block_get(struct inode *inode,
857                                  int name_index,
858                                  const char *name,
859                                  void *buffer,
860                                  size_t buffer_size,
861                                  struct ocfs2_xattr_search *xs)
862 {
863         struct ocfs2_xattr_block *xb;
864         struct ocfs2_xattr_value_root *xv;
865         size_t size;
866         int ret = -ENODATA, name_offset, name_len, block_off, i;
867
868         xs->bucket = ocfs2_xattr_bucket_new(inode);
869         if (!xs->bucket) {
870                 ret = -ENOMEM;
871                 mlog_errno(ret);
872                 goto cleanup;
873         }
874
875         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
876         if (ret) {
877                 mlog_errno(ret);
878                 goto cleanup;
879         }
880
881         if (xs->not_found) {
882                 ret = -ENODATA;
883                 goto cleanup;
884         }
885
886         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
887         size = le64_to_cpu(xs->here->xe_value_size);
888         if (buffer) {
889                 ret = -ERANGE;
890                 if (size > buffer_size)
891                         goto cleanup;
892
893                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
894                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
895                 i = xs->here - xs->header->xh_entries;
896
897                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
898                         ret = ocfs2_xattr_bucket_get_name_value(inode,
899                                                                 bucket_xh(xs->bucket),
900                                                                 i,
901                                                                 &block_off,
902                                                                 &name_offset);
903                         xs->base = bucket_block(xs->bucket, block_off);
904                 }
905                 if (ocfs2_xattr_is_local(xs->here)) {
906                         memcpy(buffer, (void *)xs->base +
907                                name_offset + name_len, size);
908                 } else {
909                         xv = (struct ocfs2_xattr_value_root *)
910                                 (xs->base + name_offset + name_len);
911                         ret = ocfs2_xattr_get_value_outside(inode, xv,
912                                                             buffer, size);
913                         if (ret < 0) {
914                                 mlog_errno(ret);
915                                 goto cleanup;
916                         }
917                 }
918         }
919         ret = size;
920 cleanup:
921         ocfs2_xattr_bucket_free(xs->bucket);
922
923         brelse(xs->xattr_bh);
924         xs->xattr_bh = NULL;
925         return ret;
926 }
927
928 int ocfs2_xattr_get_nolock(struct inode *inode,
929                            struct buffer_head *di_bh,
930                            int name_index,
931                            const char *name,
932                            void *buffer,
933                            size_t buffer_size)
934 {
935         int ret;
936         struct ocfs2_dinode *di = NULL;
937         struct ocfs2_inode_info *oi = OCFS2_I(inode);
938         struct ocfs2_xattr_search xis = {
939                 .not_found = -ENODATA,
940         };
941         struct ocfs2_xattr_search xbs = {
942                 .not_found = -ENODATA,
943         };
944
945         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
946                 return -EOPNOTSUPP;
947
948         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
949                 ret = -ENODATA;
950
951         xis.inode_bh = xbs.inode_bh = di_bh;
952         di = (struct ocfs2_dinode *)di_bh->b_data;
953
954         down_read(&oi->ip_xattr_sem);
955         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
956                                     buffer_size, &xis);
957         if (ret == -ENODATA && di->i_xattr_loc)
958                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
959                                             buffer_size, &xbs);
960         up_read(&oi->ip_xattr_sem);
961
962         return ret;
963 }
964
965 /* ocfs2_xattr_get()
966  *
967  * Copy an extended attribute into the buffer provided.
968  * Buffer is NULL to compute the size of buffer required.
969  */
970 static int ocfs2_xattr_get(struct inode *inode,
971                            int name_index,
972                            const char *name,
973                            void *buffer,
974                            size_t buffer_size)
975 {
976         int ret;
977         struct buffer_head *di_bh = NULL;
978
979         ret = ocfs2_inode_lock(inode, &di_bh, 0);
980         if (ret < 0) {
981                 mlog_errno(ret);
982                 return ret;
983         }
984         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
985                                      name, buffer, buffer_size);
986
987         ocfs2_inode_unlock(inode, 0);
988
989         brelse(di_bh);
990
991         return ret;
992 }
993
994 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
995                                            handle_t *handle,
996                                            struct ocfs2_xattr_value_root *xv,
997                                            const void *value,
998                                            int value_len)
999 {
1000         int ret = 0, i, cp_len, credits;
1001         u16 blocksize = inode->i_sb->s_blocksize;
1002         u32 p_cluster, num_clusters;
1003         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1004         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1005         u64 blkno;
1006         struct buffer_head *bh = NULL;
1007
1008         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1009
1010         /*
1011          * In __ocfs2_xattr_set_value_outside has already been dirtied,
1012          * so we don't need to worry about whether ocfs2_extend_trans
1013          * will create a new transactio for us or not.
1014          */
1015         credits = clusters * bpc;
1016         ret = ocfs2_extend_trans(handle, credits);
1017         if (ret) {
1018                 mlog_errno(ret);
1019                 goto out;
1020         }
1021
1022         while (cpos < clusters) {
1023                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1024                                                &num_clusters, &xv->xr_list);
1025                 if (ret) {
1026                         mlog_errno(ret);
1027                         goto out;
1028                 }
1029
1030                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1031
1032                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1033                         ret = ocfs2_read_block(inode, blkno, &bh);
1034                         if (ret) {
1035                                 mlog_errno(ret);
1036                                 goto out;
1037                         }
1038
1039                         ret = ocfs2_journal_access(handle,
1040                                                    inode,
1041                                                    bh,
1042                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1043                         if (ret < 0) {
1044                                 mlog_errno(ret);
1045                                 goto out;
1046                         }
1047
1048                         cp_len = value_len > blocksize ? blocksize : value_len;
1049                         memcpy(bh->b_data, value, cp_len);
1050                         value_len -= cp_len;
1051                         value += cp_len;
1052                         if (cp_len < blocksize)
1053                                 memset(bh->b_data + cp_len, 0,
1054                                        blocksize - cp_len);
1055
1056                         ret = ocfs2_journal_dirty(handle, bh);
1057                         if (ret < 0) {
1058                                 mlog_errno(ret);
1059                                 goto out;
1060                         }
1061                         brelse(bh);
1062                         bh = NULL;
1063
1064                         /*
1065                          * XXX: do we need to empty all the following
1066                          * blocks in this cluster?
1067                          */
1068                         if (!value_len)
1069                                 break;
1070                 }
1071                 cpos += num_clusters;
1072         }
1073 out:
1074         brelse(bh);
1075
1076         return ret;
1077 }
1078
1079 static int ocfs2_xattr_cleanup(struct inode *inode,
1080                                handle_t *handle,
1081                                struct ocfs2_xattr_info *xi,
1082                                struct ocfs2_xattr_search *xs,
1083                                size_t offs)
1084 {
1085         int ret = 0;
1086         size_t name_len = strlen(xi->name);
1087         void *val = xs->base + offs;
1088         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1089
1090         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1091                                    OCFS2_JOURNAL_ACCESS_WRITE);
1092         if (ret) {
1093                 mlog_errno(ret);
1094                 goto out;
1095         }
1096         /* Decrease xattr count */
1097         le16_add_cpu(&xs->header->xh_count, -1);
1098         /* Remove the xattr entry and tree root which has already be set*/
1099         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1100         memset(val, 0, size);
1101
1102         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1103         if (ret < 0)
1104                 mlog_errno(ret);
1105 out:
1106         return ret;
1107 }
1108
1109 static int ocfs2_xattr_update_entry(struct inode *inode,
1110                                     handle_t *handle,
1111                                     struct ocfs2_xattr_info *xi,
1112                                     struct ocfs2_xattr_search *xs,
1113                                     size_t offs)
1114 {
1115         int ret;
1116
1117         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1118                                    OCFS2_JOURNAL_ACCESS_WRITE);
1119         if (ret) {
1120                 mlog_errno(ret);
1121                 goto out;
1122         }
1123
1124         xs->here->xe_name_offset = cpu_to_le16(offs);
1125         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1126         if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1127                 ocfs2_xattr_set_local(xs->here, 1);
1128         else
1129                 ocfs2_xattr_set_local(xs->here, 0);
1130         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1131
1132         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1133         if (ret < 0)
1134                 mlog_errno(ret);
1135 out:
1136         return ret;
1137 }
1138
1139 /*
1140  * ocfs2_xattr_set_value_outside()
1141  *
1142  * Set large size value in B tree.
1143  */
1144 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1145                                          struct ocfs2_xattr_info *xi,
1146                                          struct ocfs2_xattr_search *xs,
1147                                          struct ocfs2_xattr_set_ctxt *ctxt,
1148                                          size_t offs)
1149 {
1150         size_t name_len = strlen(xi->name);
1151         void *val = xs->base + offs;
1152         struct ocfs2_xattr_value_root *xv = NULL;
1153         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1154         int ret = 0;
1155
1156         memset(val, 0, size);
1157         memcpy(val, xi->name, name_len);
1158         xv = (struct ocfs2_xattr_value_root *)
1159                 (val + OCFS2_XATTR_SIZE(name_len));
1160         xv->xr_clusters = 0;
1161         xv->xr_last_eb_blk = 0;
1162         xv->xr_list.l_tree_depth = 0;
1163         xv->xr_list.l_count = cpu_to_le16(1);
1164         xv->xr_list.l_next_free_rec = 0;
1165
1166         ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1167                                          xi->value_len, ctxt);
1168         if (ret < 0) {
1169                 mlog_errno(ret);
1170                 return ret;
1171         }
1172         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs);
1173         if (ret < 0) {
1174                 mlog_errno(ret);
1175                 return ret;
1176         }
1177         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv,
1178                                               xi->value, xi->value_len);
1179         if (ret < 0)
1180                 mlog_errno(ret);
1181
1182         return ret;
1183 }
1184
1185 /*
1186  * ocfs2_xattr_set_entry_local()
1187  *
1188  * Set, replace or remove extended attribute in local.
1189  */
1190 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1191                                         struct ocfs2_xattr_info *xi,
1192                                         struct ocfs2_xattr_search *xs,
1193                                         struct ocfs2_xattr_entry *last,
1194                                         size_t min_offs)
1195 {
1196         size_t name_len = strlen(xi->name);
1197         int i;
1198
1199         if (xi->value && xs->not_found) {
1200                 /* Insert the new xattr entry. */
1201                 le16_add_cpu(&xs->header->xh_count, 1);
1202                 ocfs2_xattr_set_type(last, xi->name_index);
1203                 ocfs2_xattr_set_local(last, 1);
1204                 last->xe_name_len = name_len;
1205         } else {
1206                 void *first_val;
1207                 void *val;
1208                 size_t offs, size;
1209
1210                 first_val = xs->base + min_offs;
1211                 offs = le16_to_cpu(xs->here->xe_name_offset);
1212                 val = xs->base + offs;
1213
1214                 if (le64_to_cpu(xs->here->xe_value_size) >
1215                     OCFS2_XATTR_INLINE_SIZE)
1216                         size = OCFS2_XATTR_SIZE(name_len) +
1217                                 OCFS2_XATTR_ROOT_SIZE;
1218                 else
1219                         size = OCFS2_XATTR_SIZE(name_len) +
1220                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1221
1222                 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1223                                 OCFS2_XATTR_SIZE(xi->value_len)) {
1224                         /* The old and the new value have the
1225                            same size. Just replace the value. */
1226                         ocfs2_xattr_set_local(xs->here, 1);
1227                         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1228                         /* Clear value bytes. */
1229                         memset(val + OCFS2_XATTR_SIZE(name_len),
1230                                0,
1231                                OCFS2_XATTR_SIZE(xi->value_len));
1232                         memcpy(val + OCFS2_XATTR_SIZE(name_len),
1233                                xi->value,
1234                                xi->value_len);
1235                         return;
1236                 }
1237                 /* Remove the old name+value. */
1238                 memmove(first_val + size, first_val, val - first_val);
1239                 memset(first_val, 0, size);
1240                 xs->here->xe_name_hash = 0;
1241                 xs->here->xe_name_offset = 0;
1242                 ocfs2_xattr_set_local(xs->here, 1);
1243                 xs->here->xe_value_size = 0;
1244
1245                 min_offs += size;
1246
1247                 /* Adjust all value offsets. */
1248                 last = xs->header->xh_entries;
1249                 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1250                         size_t o = le16_to_cpu(last->xe_name_offset);
1251
1252                         if (o < offs)
1253                                 last->xe_name_offset = cpu_to_le16(o + size);
1254                         last += 1;
1255                 }
1256
1257                 if (!xi->value) {
1258                         /* Remove the old entry. */
1259                         last -= 1;
1260                         memmove(xs->here, xs->here + 1,
1261                                 (void *)last - (void *)xs->here);
1262                         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1263                         le16_add_cpu(&xs->header->xh_count, -1);
1264                 }
1265         }
1266         if (xi->value) {
1267                 /* Insert the new name+value. */
1268                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1269                                 OCFS2_XATTR_SIZE(xi->value_len);
1270                 void *val = xs->base + min_offs - size;
1271
1272                 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1273                 memset(val, 0, size);
1274                 memcpy(val, xi->name, name_len);
1275                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1276                        xi->value,
1277                        xi->value_len);
1278                 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1279                 ocfs2_xattr_set_local(xs->here, 1);
1280                 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1281         }
1282
1283         return;
1284 }
1285
1286 /*
1287  * ocfs2_xattr_set_entry()
1288  *
1289  * Set extended attribute entry into inode or block.
1290  *
1291  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1292  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1293  * then set value in B tree with set_value_outside().
1294  */
1295 static int ocfs2_xattr_set_entry(struct inode *inode,
1296                                  struct ocfs2_xattr_info *xi,
1297                                  struct ocfs2_xattr_search *xs,
1298                                  struct ocfs2_xattr_set_ctxt *ctxt,
1299                                  int flag)
1300 {
1301         struct ocfs2_xattr_entry *last;
1302         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1303         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1304         size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1305         size_t size_l = 0;
1306         handle_t *handle = ctxt->handle;
1307         int free, i, ret;
1308         struct ocfs2_xattr_info xi_l = {
1309                 .name_index = xi->name_index,
1310                 .name = xi->name,
1311                 .value = xi->value,
1312                 .value_len = xi->value_len,
1313         };
1314
1315         /* Compute min_offs, last and free space. */
1316         last = xs->header->xh_entries;
1317
1318         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1319                 size_t offs = le16_to_cpu(last->xe_name_offset);
1320                 if (offs < min_offs)
1321                         min_offs = offs;
1322                 last += 1;
1323         }
1324
1325         free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1326         if (free < 0)
1327                 return -EIO;
1328
1329         if (!xs->not_found) {
1330                 size_t size = 0;
1331                 if (ocfs2_xattr_is_local(xs->here))
1332                         size = OCFS2_XATTR_SIZE(name_len) +
1333                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1334                 else
1335                         size = OCFS2_XATTR_SIZE(name_len) +
1336                                 OCFS2_XATTR_ROOT_SIZE;
1337                 free += (size + sizeof(struct ocfs2_xattr_entry));
1338         }
1339         /* Check free space in inode or block */
1340         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1341                 if (free < sizeof(struct ocfs2_xattr_entry) +
1342                            OCFS2_XATTR_SIZE(name_len) +
1343                            OCFS2_XATTR_ROOT_SIZE) {
1344                         ret = -ENOSPC;
1345                         goto out;
1346                 }
1347                 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1348                 xi_l.value = (void *)&def_xv;
1349                 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1350         } else if (xi->value) {
1351                 if (free < sizeof(struct ocfs2_xattr_entry) +
1352                            OCFS2_XATTR_SIZE(name_len) +
1353                            OCFS2_XATTR_SIZE(xi->value_len)) {
1354                         ret = -ENOSPC;
1355                         goto out;
1356                 }
1357         }
1358
1359         if (!xs->not_found) {
1360                 /* For existing extended attribute */
1361                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1362                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1363                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1364                 void *val = xs->base + offs;
1365
1366                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1367                         /* Replace existing local xattr with tree root */
1368                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1369                                                             ctxt, offs);
1370                         if (ret < 0)
1371                                 mlog_errno(ret);
1372                         goto out;
1373                 } else if (!ocfs2_xattr_is_local(xs->here)) {
1374                         /* For existing xattr which has value outside */
1375                         struct ocfs2_xattr_value_root *xv = NULL;
1376                         xv = (struct ocfs2_xattr_value_root *)(val +
1377                                 OCFS2_XATTR_SIZE(name_len));
1378
1379                         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1380                                 /*
1381                                  * If new value need set outside also,
1382                                  * first truncate old value to new value,
1383                                  * then set new value with set_value_outside().
1384                                  */
1385                                 ret = ocfs2_xattr_value_truncate(inode,
1386                                                                  xs->xattr_bh,
1387                                                                  xv,
1388                                                                  xi->value_len,
1389                                                                  ctxt);
1390                                 if (ret < 0) {
1391                                         mlog_errno(ret);
1392                                         goto out;
1393                                 }
1394
1395                                 ret = ocfs2_xattr_update_entry(inode,
1396                                                                handle,
1397                                                                xi,
1398                                                                xs,
1399                                                                offs);
1400                                 if (ret < 0) {
1401                                         mlog_errno(ret);
1402                                         goto out;
1403                                 }
1404
1405                                 ret = __ocfs2_xattr_set_value_outside(inode,
1406                                                                 handle,
1407                                                                 xv,
1408                                                                 xi->value,
1409                                                                 xi->value_len);
1410                                 if (ret < 0)
1411                                         mlog_errno(ret);
1412                                 goto out;
1413                         } else {
1414                                 /*
1415                                  * If new value need set in local,
1416                                  * just trucate old value to zero.
1417                                  */
1418                                  ret = ocfs2_xattr_value_truncate(inode,
1419                                                                   xs->xattr_bh,
1420                                                                   xv,
1421                                                                   0,
1422                                                                   ctxt);
1423                                 if (ret < 0)
1424                                         mlog_errno(ret);
1425                         }
1426                 }
1427         }
1428
1429         ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1430                                    OCFS2_JOURNAL_ACCESS_WRITE);
1431         if (ret) {
1432                 mlog_errno(ret);
1433                 goto out;
1434         }
1435
1436         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1437                 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1438                                            OCFS2_JOURNAL_ACCESS_WRITE);
1439                 if (ret) {
1440                         mlog_errno(ret);
1441                         goto out;
1442                 }
1443         }
1444
1445         /*
1446          * Set value in local, include set tree root in local.
1447          * This is the first step for value size >INLINE_SIZE.
1448          */
1449         ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1450
1451         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1452                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1453                 if (ret < 0) {
1454                         mlog_errno(ret);
1455                         goto out;
1456                 }
1457         }
1458
1459         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1460             (flag & OCFS2_INLINE_XATTR_FL)) {
1461                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1462                 unsigned int xattrsize = osb->s_xattr_inline_size;
1463
1464                 /*
1465                  * Adjust extent record count or inline data size
1466                  * to reserve space for extended attribute.
1467                  */
1468                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1469                         struct ocfs2_inline_data *idata = &di->id2.i_data;
1470                         le16_add_cpu(&idata->id_count, -xattrsize);
1471                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1472                         struct ocfs2_extent_list *el = &di->id2.i_list;
1473                         le16_add_cpu(&el->l_count, -(xattrsize /
1474                                         sizeof(struct ocfs2_extent_rec)));
1475                 }
1476                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1477         }
1478         /* Update xattr flag */
1479         spin_lock(&oi->ip_lock);
1480         oi->ip_dyn_features |= flag;
1481         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1482         spin_unlock(&oi->ip_lock);
1483         /* Update inode ctime */
1484         inode->i_ctime = CURRENT_TIME;
1485         di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1486         di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1487
1488         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1489         if (ret < 0)
1490                 mlog_errno(ret);
1491
1492         if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1493                 /*
1494                  * Set value outside in B tree.
1495                  * This is the second step for value size > INLINE_SIZE.
1496                  */
1497                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1498                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs);
1499                 if (ret < 0) {
1500                         int ret2;
1501
1502                         mlog_errno(ret);
1503                         /*
1504                          * If set value outside failed, we have to clean
1505                          * the junk tree root we have already set in local.
1506                          */
1507                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1508                                                    xi, xs, offs);
1509                         if (ret2 < 0)
1510                                 mlog_errno(ret2);
1511                 }
1512         }
1513 out:
1514         return ret;
1515 }
1516
1517 static int ocfs2_remove_value_outside(struct inode*inode,
1518                                       struct buffer_head *bh,
1519                                       struct ocfs2_xattr_header *header)
1520 {
1521         int ret = 0, i;
1522         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1523         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1524
1525         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1526
1527         ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
1528         if (IS_ERR(ctxt.handle)) {
1529                 ret = PTR_ERR(ctxt.handle);
1530                 mlog_errno(ret);
1531                 goto out;
1532         }
1533
1534         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1535                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1536
1537                 if (!ocfs2_xattr_is_local(entry)) {
1538                         struct ocfs2_xattr_value_root *xv;
1539                         void *val;
1540
1541                         val = (void *)header +
1542                                 le16_to_cpu(entry->xe_name_offset);
1543                         xv = (struct ocfs2_xattr_value_root *)
1544                                 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1545                         ret = ocfs2_xattr_value_truncate(inode, bh, xv,
1546                                                          0, &ctxt);
1547                         if (ret < 0) {
1548                                 mlog_errno(ret);
1549                                 break;
1550                         }
1551                 }
1552         }
1553
1554         ocfs2_commit_trans(osb, ctxt.handle);
1555         ocfs2_schedule_truncate_log_flush(osb, 1);
1556         ocfs2_run_deallocs(osb, &ctxt.dealloc);
1557 out:
1558         return ret;
1559 }
1560
1561 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1562                                     struct buffer_head *di_bh)
1563 {
1564
1565         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1566         struct ocfs2_xattr_header *header;
1567         int ret;
1568
1569         header = (struct ocfs2_xattr_header *)
1570                  ((void *)di + inode->i_sb->s_blocksize -
1571                  le16_to_cpu(di->i_xattr_inline_size));
1572
1573         ret = ocfs2_remove_value_outside(inode, di_bh, header);
1574
1575         return ret;
1576 }
1577
1578 static int ocfs2_xattr_block_remove(struct inode *inode,
1579                                     struct buffer_head *blk_bh)
1580 {
1581         struct ocfs2_xattr_block *xb;
1582         int ret = 0;
1583
1584         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1585         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1586                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1587                 ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1588         } else
1589                 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1590
1591         return ret;
1592 }
1593
1594 static int ocfs2_xattr_free_block(struct inode *inode,
1595                                   u64 block)
1596 {
1597         struct inode *xb_alloc_inode;
1598         struct buffer_head *xb_alloc_bh = NULL;
1599         struct buffer_head *blk_bh = NULL;
1600         struct ocfs2_xattr_block *xb;
1601         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1602         handle_t *handle;
1603         int ret = 0;
1604         u64 blk, bg_blkno;
1605         u16 bit;
1606
1607         ret = ocfs2_read_block(inode, block, &blk_bh);
1608         if (ret < 0) {
1609                 mlog_errno(ret);
1610                 goto out;
1611         }
1612
1613         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1614         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1615                 ret = -EIO;
1616                 goto out;
1617         }
1618
1619         ret = ocfs2_xattr_block_remove(inode, blk_bh);
1620         if (ret < 0) {
1621                 mlog_errno(ret);
1622                 goto out;
1623         }
1624
1625         blk = le64_to_cpu(xb->xb_blkno);
1626         bit = le16_to_cpu(xb->xb_suballoc_bit);
1627         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1628
1629         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1630                                 EXTENT_ALLOC_SYSTEM_INODE,
1631                                 le16_to_cpu(xb->xb_suballoc_slot));
1632         if (!xb_alloc_inode) {
1633                 ret = -ENOMEM;
1634                 mlog_errno(ret);
1635                 goto out;
1636         }
1637         mutex_lock(&xb_alloc_inode->i_mutex);
1638
1639         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1640         if (ret < 0) {
1641                 mlog_errno(ret);
1642                 goto out_mutex;
1643         }
1644
1645         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1646         if (IS_ERR(handle)) {
1647                 ret = PTR_ERR(handle);
1648                 mlog_errno(ret);
1649                 goto out_unlock;
1650         }
1651
1652         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1653                                        bit, bg_blkno, 1);
1654         if (ret < 0)
1655                 mlog_errno(ret);
1656
1657         ocfs2_commit_trans(osb, handle);
1658 out_unlock:
1659         ocfs2_inode_unlock(xb_alloc_inode, 1);
1660         brelse(xb_alloc_bh);
1661 out_mutex:
1662         mutex_unlock(&xb_alloc_inode->i_mutex);
1663         iput(xb_alloc_inode);
1664 out:
1665         brelse(blk_bh);
1666         return ret;
1667 }
1668
1669 /*
1670  * ocfs2_xattr_remove()
1671  *
1672  * Free extended attribute resources associated with this inode.
1673  */
1674 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1675 {
1676         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1677         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1678         handle_t *handle;
1679         int ret;
1680
1681         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1682                 return 0;
1683
1684         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1685                 return 0;
1686
1687         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1688                 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1689                 if (ret < 0) {
1690                         mlog_errno(ret);
1691                         goto out;
1692                 }
1693         }
1694
1695         if (di->i_xattr_loc) {
1696                 ret = ocfs2_xattr_free_block(inode,
1697                                              le64_to_cpu(di->i_xattr_loc));
1698                 if (ret < 0) {
1699                         mlog_errno(ret);
1700                         goto out;
1701                 }
1702         }
1703
1704         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1705                                    OCFS2_INODE_UPDATE_CREDITS);
1706         if (IS_ERR(handle)) {
1707                 ret = PTR_ERR(handle);
1708                 mlog_errno(ret);
1709                 goto out;
1710         }
1711         ret = ocfs2_journal_access(handle, inode, di_bh,
1712                                    OCFS2_JOURNAL_ACCESS_WRITE);
1713         if (ret) {
1714                 mlog_errno(ret);
1715                 goto out_commit;
1716         }
1717
1718         di->i_xattr_loc = 0;
1719
1720         spin_lock(&oi->ip_lock);
1721         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1722         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1723         spin_unlock(&oi->ip_lock);
1724
1725         ret = ocfs2_journal_dirty(handle, di_bh);
1726         if (ret < 0)
1727                 mlog_errno(ret);
1728 out_commit:
1729         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1730 out:
1731         return ret;
1732 }
1733
1734 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1735                                         struct ocfs2_dinode *di)
1736 {
1737         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1738         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1739         int free;
1740
1741         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1742                 return 0;
1743
1744         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1745                 struct ocfs2_inline_data *idata = &di->id2.i_data;
1746                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1747         } else if (ocfs2_inode_is_fast_symlink(inode)) {
1748                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1749                         le64_to_cpu(di->i_size);
1750         } else {
1751                 struct ocfs2_extent_list *el = &di->id2.i_list;
1752                 free = (le16_to_cpu(el->l_count) -
1753                         le16_to_cpu(el->l_next_free_rec)) *
1754                         sizeof(struct ocfs2_extent_rec);
1755         }
1756         if (free >= xattrsize)
1757                 return 1;
1758
1759         return 0;
1760 }
1761
1762 /*
1763  * ocfs2_xattr_ibody_find()
1764  *
1765  * Find extended attribute in inode block and
1766  * fill search info into struct ocfs2_xattr_search.
1767  */
1768 static int ocfs2_xattr_ibody_find(struct inode *inode,
1769                                   int name_index,
1770                                   const char *name,
1771                                   struct ocfs2_xattr_search *xs)
1772 {
1773         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1774         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1775         int ret;
1776         int has_space = 0;
1777
1778         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1779                 return 0;
1780
1781         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1782                 down_read(&oi->ip_alloc_sem);
1783                 has_space = ocfs2_xattr_has_space_inline(inode, di);
1784                 up_read(&oi->ip_alloc_sem);
1785                 if (!has_space)
1786                         return 0;
1787         }
1788
1789         xs->xattr_bh = xs->inode_bh;
1790         xs->end = (void *)di + inode->i_sb->s_blocksize;
1791         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1792                 xs->header = (struct ocfs2_xattr_header *)
1793                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1794         else
1795                 xs->header = (struct ocfs2_xattr_header *)
1796                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1797         xs->base = (void *)xs->header;
1798         xs->here = xs->header->xh_entries;
1799
1800         /* Find the named attribute. */
1801         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1802                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1803                 if (ret && ret != -ENODATA)
1804                         return ret;
1805                 xs->not_found = ret;
1806         }
1807
1808         return 0;
1809 }
1810
1811 /*
1812  * ocfs2_xattr_ibody_set()
1813  *
1814  * Set, replace or remove an extended attribute into inode block.
1815  *
1816  */
1817 static int ocfs2_xattr_ibody_set(struct inode *inode,
1818                                  struct ocfs2_xattr_info *xi,
1819                                  struct ocfs2_xattr_search *xs,
1820                                  struct ocfs2_xattr_set_ctxt *ctxt)
1821 {
1822         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1823         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1824         int ret;
1825
1826         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1827                 return -ENOSPC;
1828
1829         down_write(&oi->ip_alloc_sem);
1830         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1831                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
1832                         ret = -ENOSPC;
1833                         goto out;
1834                 }
1835         }
1836
1837         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
1838                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1839 out:
1840         up_write(&oi->ip_alloc_sem);
1841
1842         return ret;
1843 }
1844
1845 /*
1846  * ocfs2_xattr_block_find()
1847  *
1848  * Find extended attribute in external block and
1849  * fill search info into struct ocfs2_xattr_search.
1850  */
1851 static int ocfs2_xattr_block_find(struct inode *inode,
1852                                   int name_index,
1853                                   const char *name,
1854                                   struct ocfs2_xattr_search *xs)
1855 {
1856         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1857         struct buffer_head *blk_bh = NULL;
1858         struct ocfs2_xattr_block *xb;
1859         int ret = 0;
1860
1861         if (!di->i_xattr_loc)
1862                 return ret;
1863
1864         ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
1865         if (ret < 0) {
1866                 mlog_errno(ret);
1867                 return ret;
1868         }
1869
1870         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1871         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1872                 ret = -EIO;
1873                 goto cleanup;
1874         }
1875
1876         xs->xattr_bh = blk_bh;
1877
1878         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1879                 xs->header = &xb->xb_attrs.xb_header;
1880                 xs->base = (void *)xs->header;
1881                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1882                 xs->here = xs->header->xh_entries;
1883
1884                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1885         } else
1886                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1887                                                    name_index,
1888                                                    name, xs);
1889
1890         if (ret && ret != -ENODATA) {
1891                 xs->xattr_bh = NULL;
1892                 goto cleanup;
1893         }
1894         xs->not_found = ret;
1895         return 0;
1896 cleanup:
1897         brelse(blk_bh);
1898
1899         return ret;
1900 }
1901
1902 /*
1903  * ocfs2_xattr_block_set()
1904  *
1905  * Set, replace or remove an extended attribute into external block.
1906  *
1907  */
1908 static int ocfs2_xattr_block_set(struct inode *inode,
1909                                  struct ocfs2_xattr_info *xi,
1910                                  struct ocfs2_xattr_search *xs,
1911                                  struct ocfs2_xattr_set_ctxt *ctxt)
1912 {
1913         struct buffer_head *new_bh = NULL;
1914         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1915         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
1916         handle_t *handle = ctxt->handle;
1917         struct ocfs2_xattr_block *xblk = NULL;
1918         u16 suballoc_bit_start;
1919         u32 num_got;
1920         u64 first_blkno;
1921         int ret;
1922
1923         if (!xs->xattr_bh) {
1924                 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1925                                            OCFS2_JOURNAL_ACCESS_CREATE);
1926                 if (ret < 0) {
1927                         mlog_errno(ret);
1928                         goto end;
1929                 }
1930
1931                 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
1932                                            &suballoc_bit_start, &num_got,
1933                                            &first_blkno);
1934                 if (ret < 0) {
1935                         mlog_errno(ret);
1936                         goto end;
1937                 }
1938
1939                 new_bh = sb_getblk(inode->i_sb, first_blkno);
1940                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1941
1942                 ret = ocfs2_journal_access(handle, inode, new_bh,
1943                                            OCFS2_JOURNAL_ACCESS_CREATE);
1944                 if (ret < 0) {
1945                         mlog_errno(ret);
1946                         goto end;
1947                 }
1948
1949                 /* Initialize ocfs2_xattr_block */
1950                 xs->xattr_bh = new_bh;
1951                 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
1952                 memset(xblk, 0, inode->i_sb->s_blocksize);
1953                 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
1954                 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
1955                 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1956                 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
1957                 xblk->xb_blkno = cpu_to_le64(first_blkno);
1958
1959                 xs->header = &xblk->xb_attrs.xb_header;
1960                 xs->base = (void *)xs->header;
1961                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1962                 xs->here = xs->header->xh_entries;
1963
1964                 ret = ocfs2_journal_dirty(handle, new_bh);
1965                 if (ret < 0) {
1966                         mlog_errno(ret);
1967                         goto end;
1968                 }
1969                 di->i_xattr_loc = cpu_to_le64(first_blkno);
1970                 ocfs2_journal_dirty(handle, xs->inode_bh);
1971         } else
1972                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1973
1974         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1975                 /* Set extended attribute into external block */
1976                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
1977                                             OCFS2_HAS_XATTR_FL);
1978                 if (!ret || ret != -ENOSPC)
1979                         goto end;
1980
1981                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
1982                 if (ret)
1983                         goto end;
1984         }
1985
1986         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
1987
1988 end:
1989
1990         return ret;
1991 }
1992
1993 /* Check whether the new xattr can be inserted into the inode. */
1994 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
1995                                        struct ocfs2_xattr_info *xi,
1996                                        struct ocfs2_xattr_search *xs)
1997 {
1998         u64 value_size;
1999         struct ocfs2_xattr_entry *last;
2000         int free, i;
2001         size_t min_offs = xs->end - xs->base;
2002
2003         if (!xs->header)
2004                 return 0;
2005
2006         last = xs->header->xh_entries;
2007
2008         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2009                 size_t offs = le16_to_cpu(last->xe_name_offset);
2010                 if (offs < min_offs)
2011                         min_offs = offs;
2012                 last += 1;
2013         }
2014
2015         free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
2016         if (free < 0)
2017                 return 0;
2018
2019         BUG_ON(!xs->not_found);
2020
2021         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2022                 value_size = OCFS2_XATTR_ROOT_SIZE;
2023         else
2024                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2025
2026         if (free >= sizeof(struct ocfs2_xattr_entry) +
2027                    OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2028                 return 1;
2029
2030         return 0;
2031 }
2032
2033 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2034                                      struct ocfs2_dinode *di,
2035                                      struct ocfs2_xattr_info *xi,
2036                                      struct ocfs2_xattr_search *xis,
2037                                      struct ocfs2_xattr_search *xbs,
2038                                      int *clusters_need,
2039                                      int *meta_need,
2040                                      int *credits_need)
2041 {
2042         int ret = 0, old_in_xb = 0;
2043         int clusters_add = 0, meta_add = 0, credits = 0;
2044         struct buffer_head *bh = NULL;
2045         struct ocfs2_xattr_block *xb = NULL;
2046         struct ocfs2_xattr_entry *xe = NULL;
2047         struct ocfs2_xattr_value_root *xv = NULL;
2048         char *base = NULL;
2049         int name_offset, name_len = 0;
2050         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2051                                                     xi->value_len);
2052         u64 value_size;
2053
2054         if (xis->not_found && xbs->not_found) {
2055                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2056
2057                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2058                         clusters_add += new_clusters;
2059                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2060                                                         &def_xv.xv.xr_list,
2061                                                         new_clusters);
2062                 }
2063
2064                 goto meta_guess;
2065         }
2066
2067         if (!xis->not_found) {
2068                 xe = xis->here;
2069                 name_offset = le16_to_cpu(xe->xe_name_offset);
2070                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2071                 base = xis->base;
2072                 credits += OCFS2_INODE_UPDATE_CREDITS;
2073         } else {
2074                 int i, block_off;
2075                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2076                 xe = xbs->here;
2077                 name_offset = le16_to_cpu(xe->xe_name_offset);
2078                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2079                 i = xbs->here - xbs->header->xh_entries;
2080                 old_in_xb = 1;
2081
2082                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2083                         ret = ocfs2_xattr_bucket_get_name_value(inode,
2084                                                         bucket_xh(xbs->bucket),
2085                                                         i, &block_off,
2086                                                         &name_offset);
2087                         base = bucket_block(xbs->bucket, block_off);
2088                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2089                 } else {
2090                         base = xbs->base;
2091                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2092                 }
2093         }
2094
2095         /*
2096          * delete a xattr doesn't need metadata and cluster allocation.
2097          * so just calculate the credits and return.
2098          *
2099          * The credits for removing the value tree will be extended
2100          * by ocfs2_remove_extent itself.
2101          */
2102         if (!xi->value) {
2103                 if (!ocfs2_xattr_is_local(xe))
2104                         credits += OCFS2_REMOVE_EXTENT_CREDITS;
2105
2106                 goto out;
2107         }
2108
2109         /* do cluster allocation guess first. */
2110         value_size = le64_to_cpu(xe->xe_value_size);
2111
2112         if (old_in_xb) {
2113                 /*
2114                  * In xattr set, we always try to set the xe in inode first,
2115                  * so if it can be inserted into inode successfully, the old
2116                  * one will be removed from the xattr block, and this xattr
2117                  * will be inserted into inode as a new xattr in inode.
2118                  */
2119                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2120                         clusters_add += new_clusters;
2121                         credits += OCFS2_REMOVE_EXTENT_CREDITS +
2122                                     OCFS2_INODE_UPDATE_CREDITS;
2123                         if (!ocfs2_xattr_is_local(xe))
2124                                 credits += ocfs2_calc_extend_credits(
2125                                                         inode->i_sb,
2126                                                         &def_xv.xv.xr_list,
2127                                                         new_clusters);
2128                         goto out;
2129                 }
2130         }
2131
2132         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2133                 /* the new values will be stored outside. */
2134                 u32 old_clusters = 0;
2135
2136                 if (!ocfs2_xattr_is_local(xe)) {
2137                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
2138                                                                  value_size);
2139                         xv = (struct ocfs2_xattr_value_root *)
2140                              (base + name_offset + name_len);
2141                 } else
2142                         xv = &def_xv.xv;
2143
2144                 if (old_clusters >= new_clusters) {
2145                         credits += OCFS2_REMOVE_EXTENT_CREDITS;
2146                         goto out;
2147                 } else {
2148                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2149                         clusters_add += new_clusters - old_clusters;
2150                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2151                                                              &xv->xr_list,
2152                                                              new_clusters -
2153                                                              old_clusters);
2154                         goto out;
2155                 }
2156         } else {
2157                 /*
2158                  * Now the new value will be stored inside. So if the new
2159                  * value is smaller than the size of value root or the old
2160                  * value, we don't need any allocation, otherwise we have
2161                  * to guess metadata allocation.
2162                  */
2163                 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2164                     (!ocfs2_xattr_is_local(xe) &&
2165                      OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2166                         goto out;
2167         }
2168
2169 meta_guess:
2170         /* calculate metadata allocation. */
2171         if (di->i_xattr_loc) {
2172                 if (!xbs->xattr_bh) {
2173                         ret = ocfs2_read_block(inode,
2174                                                le64_to_cpu(di->i_xattr_loc),
2175                                                &bh);
2176                         if (ret) {
2177                                 mlog_errno(ret);
2178                                 goto out;
2179                         }
2180
2181                         xb = (struct ocfs2_xattr_block *)bh->b_data;
2182                 } else
2183                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2184
2185                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2186                         struct ocfs2_extent_list *el =
2187                                  &xb->xb_attrs.xb_root.xt_list;
2188                         meta_add += ocfs2_extend_meta_needed(el);
2189                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2190                                                              el, 1);
2191                 }
2192
2193                 /*
2194                  * This cluster will be used either for new bucket or for
2195                  * new xattr block.
2196                  * If the cluster size is the same as the bucket size, one
2197                  * more is needed since we may need to extend the bucket
2198                  * also.
2199                  */
2200                 clusters_add += 1;
2201                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2202                 if (OCFS2_XATTR_BUCKET_SIZE ==
2203                         OCFS2_SB(inode->i_sb)->s_clustersize) {
2204                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2205                         clusters_add += 1;
2206                 }
2207         } else {
2208                 meta_add += 1;
2209                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2210         }
2211 out:
2212         if (clusters_need)
2213                 *clusters_need = clusters_add;
2214         if (meta_need)
2215                 *meta_need = meta_add;
2216         if (credits_need)
2217                 *credits_need = credits;
2218         brelse(bh);
2219         return ret;
2220 }
2221
2222 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2223                                      struct ocfs2_dinode *di,
2224                                      struct ocfs2_xattr_info *xi,
2225                                      struct ocfs2_xattr_search *xis,
2226                                      struct ocfs2_xattr_search *xbs,
2227                                      struct ocfs2_xattr_set_ctxt *ctxt,
2228                                      int *credits)
2229 {
2230         int clusters_add, meta_add, ret;
2231         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2232
2233         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2234
2235         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2236
2237         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2238                                         &clusters_add, &meta_add, credits);
2239         if (ret) {
2240                 mlog_errno(ret);
2241                 return ret;
2242         }
2243
2244         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2245              "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2246
2247         if (meta_add) {
2248                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2249                                                         &ctxt->meta_ac);
2250                 if (ret) {
2251                         mlog_errno(ret);
2252                         goto out;
2253                 }
2254         }
2255
2256         if (clusters_add) {
2257                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2258                 if (ret)
2259                         mlog_errno(ret);
2260         }
2261 out:
2262         if (ret) {
2263                 if (ctxt->meta_ac) {
2264                         ocfs2_free_alloc_context(ctxt->meta_ac);
2265                         ctxt->meta_ac = NULL;
2266                 }
2267
2268                 /*
2269                  * We cannot have an error and a non null ctxt->data_ac.
2270                  */
2271         }
2272
2273         return ret;
2274 }
2275
2276 static int __ocfs2_xattr_set_handle(struct inode *inode,
2277                                     struct ocfs2_dinode *di,
2278                                     struct ocfs2_xattr_info *xi,
2279                                     struct ocfs2_xattr_search *xis,
2280                                     struct ocfs2_xattr_search *xbs,
2281                                     struct ocfs2_xattr_set_ctxt *ctxt)
2282 {
2283         int ret = 0, credits;
2284
2285         if (!xi->value) {
2286                 /* Remove existing extended attribute */
2287                 if (!xis->not_found)
2288                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2289                 else if (!xbs->not_found)
2290                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2291         } else {
2292                 /* We always try to set extended attribute into inode first*/
2293                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2294                 if (!ret && !xbs->not_found) {
2295                         /*
2296                          * If succeed and that extended attribute existing in
2297                          * external block, then we will remove it.
2298                          */
2299                         xi->value = NULL;
2300                         xi->value_len = 0;
2301
2302                         xis->not_found = -ENODATA;
2303                         ret = ocfs2_calc_xattr_set_need(inode,
2304                                                         di,
2305                                                         xi,
2306                                                         xis,
2307                                                         xbs,
2308                                                         NULL,
2309                                                         NULL,
2310                                                         &credits);
2311                         if (ret) {
2312                                 mlog_errno(ret);
2313                                 goto out;
2314                         }
2315
2316                         ret = ocfs2_extend_trans(ctxt->handle, credits +
2317                                         ctxt->handle->h_buffer_credits);
2318                         if (ret) {
2319                                 mlog_errno(ret);
2320                                 goto out;
2321                         }
2322                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2323                 } else if (ret == -ENOSPC) {
2324                         if (di->i_xattr_loc && !xbs->xattr_bh) {
2325                                 ret = ocfs2_xattr_block_find(inode,
2326                                                              xi->name_index,
2327                                                              xi->name, xbs);
2328                                 if (ret)
2329                                         goto out;
2330
2331                                 xis->not_found = -ENODATA;
2332                                 ret = ocfs2_calc_xattr_set_need(inode,
2333                                                                 di,
2334                                                                 xi,
2335                                                                 xis,
2336                                                                 xbs,
2337                                                                 NULL,
2338                                                                 NULL,
2339                                                                 &credits);
2340                                 if (ret) {
2341                                         mlog_errno(ret);
2342                                         goto out;
2343                                 }
2344
2345                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2346                                         ctxt->handle->h_buffer_credits);
2347                                 if (ret) {
2348                                         mlog_errno(ret);
2349                                         goto out;
2350                                 }
2351                         }
2352                         /*
2353                          * If no space in inode, we will set extended attribute
2354                          * into external block.
2355                          */
2356                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2357                         if (ret)
2358                                 goto out;
2359                         if (!xis->not_found) {
2360                                 /*
2361                                  * If succeed and that extended attribute
2362                                  * existing in inode, we will remove it.
2363                                  */
2364                                 xi->value = NULL;
2365                                 xi->value_len = 0;
2366                                 xbs->not_found = -ENODATA;
2367                                 ret = ocfs2_calc_xattr_set_need(inode,
2368                                                                 di,
2369                                                                 xi,
2370                                                                 xis,
2371                                                                 xbs,
2372                                                                 NULL,
2373                                                                 NULL,
2374                                                                 &credits);
2375                                 if (ret) {
2376                                         mlog_errno(ret);
2377                                         goto out;
2378                                 }
2379
2380                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2381                                                 ctxt->handle->h_buffer_credits);
2382                                 if (ret) {
2383                                         mlog_errno(ret);
2384                                         goto out;
2385                                 }
2386                                 ret = ocfs2_xattr_ibody_set(inode, xi,
2387                                                             xis, ctxt);
2388                         }
2389                 }
2390         }
2391
2392 out:
2393         return ret;
2394 }
2395
2396 /*
2397  * This function only called duing creating inode
2398  * for init security/acl xattrs of the new inode.
2399  * The xattrs could be put into ibody or extent block,
2400  * xattr bucket would not be use in this case.
2401  * transanction credits also be reserved in here.
2402  */
2403 int ocfs2_xattr_set_handle(handle_t *handle,
2404                            struct inode *inode,
2405                            struct buffer_head *di_bh,
2406                            int name_index,
2407                            const char *name,
2408                            const void *value,
2409                            size_t value_len,
2410                            int flags,
2411                            struct ocfs2_alloc_context *meta_ac,
2412                            struct ocfs2_alloc_context *data_ac)
2413 {
2414         struct ocfs2_dinode *di;
2415         int ret;
2416
2417         struct ocfs2_xattr_info xi = {
2418                 .name_index = name_index,
2419                 .name = name,
2420                 .value = value,
2421                 .value_len = value_len,
2422         };
2423
2424         struct ocfs2_xattr_search xis = {
2425                 .not_found = -ENODATA,
2426         };
2427
2428         struct ocfs2_xattr_search xbs = {
2429                 .not_found = -ENODATA,
2430         };
2431
2432         struct ocfs2_xattr_set_ctxt ctxt = {
2433                 .handle = handle,
2434                 .meta_ac = meta_ac,
2435                 .data_ac = data_ac,
2436         };
2437
2438         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2439                 return -EOPNOTSUPP;
2440
2441         xis.inode_bh = xbs.inode_bh = di_bh;
2442         di = (struct ocfs2_dinode *)di_bh->b_data;
2443
2444         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2445
2446         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2447         if (ret)
2448                 goto cleanup;
2449         if (xis.not_found) {
2450                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2451                 if (ret)
2452                         goto cleanup;
2453         }
2454
2455         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2456
2457 cleanup:
2458         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2459         brelse(xbs.xattr_bh);
2460
2461         return ret;
2462 }
2463
2464 /*
2465  * ocfs2_xattr_set()
2466  *
2467  * Set, replace or remove an extended attribute for this inode.
2468  * value is NULL to remove an existing extended attribute, else either
2469  * create or replace an extended attribute.
2470  */
2471 int ocfs2_xattr_set(struct inode *inode,
2472                     int name_index,
2473                     const char *name,
2474                     const void *value,
2475                     size_t value_len,
2476                     int flags)
2477 {
2478         struct buffer_head *di_bh = NULL;
2479         struct ocfs2_dinode *di;
2480         int ret, credits;
2481         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2482         struct inode *tl_inode = osb->osb_tl_inode;
2483         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2484
2485         struct ocfs2_xattr_info xi = {
2486                 .name_index = name_index,
2487                 .name = name,
2488                 .value = value,
2489                 .value_len = value_len,
2490         };
2491
2492         struct ocfs2_xattr_search xis = {
2493                 .not_found = -ENODATA,
2494         };
2495
2496         struct ocfs2_xattr_search xbs = {
2497                 .not_found = -ENODATA,
2498         };
2499
2500         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2501                 return -EOPNOTSUPP;
2502
2503         /*
2504          * Only xbs will be used on indexed trees.  xis doesn't need a
2505          * bucket.
2506          */
2507         xbs.bucket = ocfs2_xattr_bucket_new(inode);
2508         if (!xbs.bucket) {
2509                 mlog_errno(-ENOMEM);
2510                 return -ENOMEM;
2511         }
2512
2513         ret = ocfs2_inode_lock(inode, &di_bh, 1);
2514         if (ret < 0) {
2515                 mlog_errno(ret);
2516                 goto cleanup_nolock;
2517         }
2518         xis.inode_bh = xbs.inode_bh = di_bh;
2519         di = (struct ocfs2_dinode *)di_bh->b_data;
2520
2521         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2522         /*
2523          * Scan inode and external block to find the same name
2524          * extended attribute and collect search infomation.
2525          */
2526         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2527         if (ret)
2528                 goto cleanup;
2529         if (xis.not_found) {
2530                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2531                 if (ret)
2532                         goto cleanup;
2533         }
2534
2535         if (xis.not_found && xbs.not_found) {
2536                 ret = -ENODATA;
2537                 if (flags & XATTR_REPLACE)
2538                         goto cleanup;
2539                 ret = 0;
2540                 if (!value)
2541                         goto cleanup;
2542         } else {
2543                 ret = -EEXIST;
2544                 if (flags & XATTR_CREATE)
2545                         goto cleanup;
2546         }
2547
2548
2549         mutex_lock(&tl_inode->i_mutex);
2550
2551         if (ocfs2_truncate_log_needs_flush(osb)) {
2552                 ret = __ocfs2_flush_truncate_log(osb);
2553                 if (ret < 0) {
2554                         mutex_unlock(&tl_inode->i_mutex);
2555                         mlog_errno(ret);
2556                         goto cleanup;
2557                 }
2558         }
2559         mutex_unlock(&tl_inode->i_mutex);
2560
2561         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2562                                         &xbs, &ctxt, &credits);
2563         if (ret) {
2564                 mlog_errno(ret);
2565                 goto cleanup;
2566         }
2567
2568         ctxt.handle = ocfs2_start_trans(osb, credits);
2569         if (IS_ERR(ctxt.handle)) {
2570                 ret = PTR_ERR(ctxt.handle);
2571                 mlog_errno(ret);
2572                 goto cleanup;
2573         }
2574
2575         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2576
2577         ocfs2_commit_trans(osb, ctxt.handle);
2578
2579         if (ctxt.data_ac)
2580                 ocfs2_free_alloc_context(ctxt.data_ac);
2581         if (ctxt.meta_ac)
2582                 ocfs2_free_alloc_context(ctxt.meta_ac);
2583         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2584                 ocfs2_schedule_truncate_log_flush(osb, 1);
2585         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2586 cleanup:
2587         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2588         ocfs2_inode_unlock(inode, 1);
2589 cleanup_nolock:
2590         brelse(di_bh);
2591         brelse(xbs.xattr_bh);
2592         ocfs2_xattr_bucket_free(xbs.bucket);
2593
2594         return ret;
2595 }
2596
2597 /*
2598  * Find the xattr extent rec which may contains name_hash.
2599  * e_cpos will be the first name hash of the xattr rec.
2600  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2601  */
2602 static int ocfs2_xattr_get_rec(struct inode *inode,
2603                                u32 name_hash,
2604                                u64 *p_blkno,
2605                                u32 *e_cpos,
2606                                u32 *num_clusters,
2607                                struct ocfs2_extent_list *el)
2608 {
2609         int ret = 0, i;
2610         struct buffer_head *eb_bh = NULL;
2611         struct ocfs2_extent_block *eb;
2612         struct ocfs2_extent_rec *rec = NULL;
2613         u64 e_blkno = 0;
2614
2615         if (el->l_tree_depth) {
2616                 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2617                 if (ret) {
2618                         mlog_errno(ret);
2619                         goto out;
2620                 }
2621
2622                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2623                 el = &eb->h_list;
2624
2625                 if (el->l_tree_depth) {
2626                         ocfs2_error(inode->i_sb,
2627                                     "Inode %lu has non zero tree depth in "
2628                                     "xattr tree block %llu\n", inode->i_ino,
2629                                     (unsigned long long)eb_bh->b_blocknr);
2630                         ret = -EROFS;
2631                         goto out;
2632                 }
2633         }
2634
2635         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2636                 rec = &el->l_recs[i];
2637
2638                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2639                         e_blkno = le64_to_cpu(rec->e_blkno);
2640                         break;
2641                 }
2642         }
2643
2644         if (!e_blkno) {
2645                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2646                             "record (%u, %u, 0) in xattr", inode->i_ino,
2647                             le32_to_cpu(rec->e_cpos),
2648                             ocfs2_rec_clusters(el, rec));
2649                 ret = -EROFS;
2650                 goto out;
2651         }
2652
2653         *p_blkno = le64_to_cpu(rec->e_blkno);
2654         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2655         if (e_cpos)
2656                 *e_cpos = le32_to_cpu(rec->e_cpos);
2657 out:
2658         brelse(eb_bh);
2659         return ret;
2660 }
2661
2662 typedef int (xattr_bucket_func)(struct inode *inode,
2663                                 struct ocfs2_xattr_bucket *bucket,
2664                                 void *para);
2665
2666 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2667                                    struct ocfs2_xattr_bucket *bucket,
2668                                    int name_index,
2669                                    const char *name,
2670                                    u32 name_hash,
2671                                    u16 *xe_index,
2672                                    int *found)
2673 {
2674         int i, ret = 0, cmp = 1, block_off, new_offset;
2675         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2676         size_t name_len = strlen(name);
2677         struct ocfs2_xattr_entry *xe = NULL;
2678         char *xe_name;
2679
2680         /*
2681          * We don't use binary search in the bucket because there
2682          * may be multiple entries with the same name hash.
2683          */
2684         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2685                 xe = &xh->xh_entries[i];
2686
2687                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2688                         continue;
2689                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2690                         break;
2691
2692                 cmp = name_index - ocfs2_xattr_get_type(xe);
2693                 if (!cmp)
2694                         cmp = name_len - xe->xe_name_len;
2695                 if (cmp)
2696                         continue;
2697
2698                 ret = ocfs2_xattr_bucket_get_name_value(inode,
2699                                                         xh,
2700                                                         i,
2701                                                         &block_off,
2702                                                         &new_offset);
2703                 if (ret) {
2704                         mlog_errno(ret);
2705                         break;
2706                 }
2707
2708                 xe_name = bucket_block(bucket, block_off) + new_offset;
2709                 if (!memcmp(name, xe_name, name_len)) {
2710                         *xe_index = i;
2711                         *found = 1;
2712                         ret = 0;
2713                         break;
2714                 }
2715         }
2716
2717         return ret;
2718 }
2719
2720 /*
2721  * Find the specified xattr entry in a series of buckets.
2722  * This series start from p_blkno and last for num_clusters.
2723  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2724  * the num of the valid buckets.
2725  *
2726  * Return the buffer_head this xattr should reside in. And if the xattr's
2727  * hash is in the gap of 2 buckets, return the lower bucket.
2728  */
2729 static int ocfs2_xattr_bucket_find(struct inode *inode,
2730                                    int name_index,
2731                                    const char *name,
2732                                    u32 name_hash,
2733                                    u64 p_blkno,
2734                                    u32 first_hash,
2735                                    u32 num_clusters,
2736                                    struct ocfs2_xattr_search *xs)
2737 {
2738         int ret, found = 0;
2739         struct ocfs2_xattr_header *xh = NULL;
2740         struct ocfs2_xattr_entry *xe = NULL;
2741         u16 index = 0;
2742         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2743         int low_bucket = 0, bucket, high_bucket;
2744         struct ocfs2_xattr_bucket *search;
2745         u32 last_hash;
2746         u64 blkno, lower_blkno = 0;
2747
2748         search = ocfs2_xattr_bucket_new(inode);
2749         if (!search) {
2750                 ret = -ENOMEM;
2751                 mlog_errno(ret);
2752                 goto out;
2753         }
2754
2755         ret = ocfs2_read_xattr_bucket(search, p_blkno);
2756         if (ret) {
2757                 mlog_errno(ret);
2758                 goto out;
2759         }
2760
2761         xh = bucket_xh(search);
2762         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2763         while (low_bucket <= high_bucket) {
2764                 ocfs2_xattr_bucket_relse(search);
2765
2766                 bucket = (low_bucket + high_bucket) / 2;
2767                 blkno = p_blkno + bucket * blk_per_bucket;
2768                 ret = ocfs2_read_xattr_bucket(search, blkno);
2769                 if (ret) {
2770                         mlog_errno(ret);
2771                         goto out;
2772                 }
2773
2774                 xh = bucket_xh(search);
2775                 xe = &xh->xh_entries[0];
2776                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2777                         high_bucket = bucket - 1;
2778                         continue;
2779                 }
2780
2781                 /*
2782                  * Check whether the hash of the last entry in our
2783                  * bucket is larger than the search one. for an empty
2784                  * bucket, the last one is also the first one.
2785                  */
2786                 if (xh->xh_count)
2787                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2788
2789                 last_hash = le32_to_cpu(xe->xe_name_hash);
2790
2791                 /* record lower_blkno which may be the insert place. */
2792                 lower_blkno = blkno;
2793
2794                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2795                         low_bucket = bucket + 1;
2796                         continue;
2797                 }
2798
2799                 /* the searched xattr should reside in this bucket if exists. */
2800                 ret = ocfs2_find_xe_in_bucket(inode, search,
2801                                               name_index, name, name_hash,
2802                                               &index, &found);
2803                 if (ret) {
2804                         mlog_errno(ret);
2805                         goto out;
2806                 }
2807                 break;
2808         }
2809
2810         /*
2811          * Record the bucket we have found.
2812          * When the xattr's hash value is in the gap of 2 buckets, we will
2813          * always set it to the previous bucket.
2814          */
2815         if (!lower_blkno)
2816                 lower_blkno = p_blkno;
2817
2818         /* This should be in cache - we just read it during the search */
2819         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
2820         if (ret) {
2821                 mlog_errno(ret);
2822                 goto out;
2823         }
2824
2825         xs->header = bucket_xh(xs->bucket);
2826         xs->base = bucket_block(xs->bucket, 0);
2827         xs->end = xs->base + inode->i_sb->s_blocksize;
2828
2829         if (found) {
2830                 xs->here = &xs->header->xh_entries[index];
2831                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2832                      (unsigned long long)bucket_blkno(xs->bucket), index);
2833         } else
2834                 ret = -ENODATA;
2835
2836 out:
2837         ocfs2_xattr_bucket_free(search);
2838         return ret;
2839 }
2840
2841 static int ocfs2_xattr_index_block_find(struct inode *inode,
2842                                         struct buffer_head *root_bh,
2843                                         int name_index,
2844                                         const char *name,
2845                                         struct ocfs2_xattr_search *xs)
2846 {
2847         int ret;
2848         struct ocfs2_xattr_block *xb =
2849                         (struct ocfs2_xattr_block *)root_bh->b_data;
2850         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2851         struct ocfs2_extent_list *el = &xb_root->xt_list;
2852         u64 p_blkno = 0;
2853         u32 first_hash, num_clusters = 0;
2854         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
2855
2856         if (le16_to_cpu(el->l_next_free_rec) == 0)
2857                 return -ENODATA;
2858
2859         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2860              name, name_hash, name_index);
2861
2862         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2863                                   &num_clusters, el);
2864         if (ret) {
2865                 mlog_errno(ret);
2866                 goto out;
2867         }
2868
2869         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2870
2871         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2872              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
2873              first_hash);
2874
2875         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2876                                       p_blkno, first_hash, num_clusters, xs);
2877
2878 out:
2879         return ret;
2880 }
2881
2882 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2883                                        u64 blkno,
2884                                        u32 clusters,
2885                                        xattr_bucket_func *func,
2886                                        void *para)
2887 {
2888         int i, ret = 0;
2889         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2890         u32 num_buckets = clusters * bpc;
2891         struct ocfs2_xattr_bucket *bucket;
2892
2893         bucket = ocfs2_xattr_bucket_new(inode);
2894         if (!bucket) {
2895                 mlog_errno(-ENOMEM);
2896                 return -ENOMEM;
2897         }
2898
2899         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2900              clusters, (unsigned long long)blkno);
2901
2902         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
2903                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
2904                 if (ret) {
2905                         mlog_errno(ret);
2906                         break;
2907                 }
2908
2909                 /*
2910                  * The real bucket num in this series of blocks is stored
2911                  * in the 1st bucket.
2912                  */
2913                 if (i == 0)
2914                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
2915
2916                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
2917                      (unsigned long long)blkno,
2918                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
2919                 if (func) {
2920                         ret = func(inode, bucket, para);
2921                         if (ret)
2922                                 mlog_errno(ret);
2923                         /* Fall through to bucket_relse() */
2924                 }
2925
2926                 ocfs2_xattr_bucket_relse(bucket);
2927                 if (ret)
2928                         break;
2929         }
2930
2931         ocfs2_xattr_bucket_free(bucket);
2932         return ret;
2933 }
2934
2935 struct ocfs2_xattr_tree_list {
2936         char *buffer;
2937         size_t buffer_size;
2938         size_t result;
2939 };
2940
2941 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2942                                              struct ocfs2_xattr_header *xh,
2943                                              int index,
2944                                              int *block_off,
2945                                              int *new_offset)
2946 {
2947         u16 name_offset;
2948
2949         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2950                 return -EINVAL;
2951
2952         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2953
2954         *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2955         *new_offset = name_offset % inode->i_sb->s_blocksize;
2956
2957         return 0;
2958 }
2959
2960 static int ocfs2_list_xattr_bucket(struct inode *inode,
2961                                    struct ocfs2_xattr_bucket *bucket,
2962                                    void *para)
2963 {
2964         int ret = 0, type;
2965         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
2966         int i, block_off, new_offset;
2967         const char *prefix, *name;
2968
2969         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
2970                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
2971                 type = ocfs2_xattr_get_type(entry);
2972                 prefix = ocfs2_xattr_prefix(type);
2973
2974                 if (prefix) {
2975                         ret = ocfs2_xattr_bucket_get_name_value(inode,
2976                                                                 bucket_xh(bucket),
2977                                                                 i,
2978                                                                 &block_off,
2979                                                                 &new_offset);
2980                         if (ret)
2981                                 break;
2982
2983                         name = (const char *)bucket_block(bucket, block_off) +
2984                                 new_offset;
2985                         ret = ocfs2_xattr_list_entry(xl->buffer,
2986                                                      xl->buffer_size,
2987                                                      &xl->result,
2988                                                      prefix, name,
2989                                                      entry->xe_name_len);
2990                         if (ret)
2991                                 break;
2992                 }
2993         }
2994
2995         return ret;
2996 }
2997
2998 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2999                                              struct ocfs2_xattr_tree_root *xt,
3000                                              char *buffer,
3001                                              size_t buffer_size)
3002 {
3003         struct ocfs2_extent_list *el = &xt->xt_list;
3004         int ret = 0;
3005         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3006         u64 p_blkno = 0;
3007         struct ocfs2_xattr_tree_list xl = {
3008                 .buffer = buffer,
3009                 .buffer_size = buffer_size,
3010                 .result = 0,
3011         };
3012
3013         if (le16_to_cpu(el->l_next_free_rec) == 0)
3014                 return 0;
3015
3016         while (name_hash > 0) {
3017                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3018                                           &e_cpos, &num_clusters, el);
3019                 if (ret) {
3020                         mlog_errno(ret);
3021                         goto out;
3022                 }
3023
3024                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
3025                                                   ocfs2_list_xattr_bucket,
3026                                                   &xl);
3027                 if (ret) {
3028                         mlog_errno(ret);
3029                         goto out;
3030                 }
3031
3032                 if (e_cpos == 0)
3033                         break;
3034
3035                 name_hash = e_cpos - 1;
3036         }
3037
3038         ret = xl.result;
3039 out:
3040         return ret;
3041 }
3042
3043 static int cmp_xe(const void *a, const void *b)
3044 {
3045         const struct ocfs2_xattr_entry *l = a, *r = b;
3046         u32 l_hash = le32_to_cpu(l->xe_name_hash);
3047         u32 r_hash = le32_to_cpu(r->xe_name_hash);
3048
3049         if (l_hash > r_hash)
3050                 return 1;
3051         if (l_hash < r_hash)
3052                 return -1;
3053         return 0;
3054 }
3055
3056 static void swap_xe(void *a, void *b, int size)
3057 {
3058         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3059
3060         tmp = *l;
3061         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3062         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3063 }
3064
3065 /*
3066  * When the ocfs2_xattr_block is filled up, new bucket will be created
3067  * and all the xattr entries will be moved to the new bucket.
3068  * The header goes at the start of the bucket, and the names+values are
3069  * filled from the end.  This is why *target starts as the last buffer.
3070  * Note: we need to sort the entries since they are not saved in order
3071  * in the ocfs2_xattr_block.
3072  */
3073 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3074                                            struct buffer_head *xb_bh,
3075                                            struct ocfs2_xattr_bucket *bucket)
3076 {
3077         int i, blocksize = inode->i_sb->s_blocksize;
3078         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3079         u16 offset, size, off_change;
3080         struct ocfs2_xattr_entry *xe;
3081         struct ocfs2_xattr_block *xb =
3082                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
3083         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3084         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3085         u16 count = le16_to_cpu(xb_xh->xh_count);
3086         char *src = xb_bh->b_data;
3087         char *target = bucket_block(bucket, blks - 1);
3088
3089         mlog(0, "cp xattr from block %llu to bucket %llu\n",
3090              (unsigned long long)xb_bh->b_blocknr,
3091              (unsigned long long)bucket_blkno(bucket));
3092
3093         for (i = 0; i < blks; i++)
3094                 memset(bucket_block(bucket, i), 0, blocksize);
3095
3096         /*
3097          * Since the xe_name_offset is based on ocfs2_xattr_header,
3098          * there is a offset change corresponding to the change of
3099          * ocfs2_xattr_header's position.
3100          */
3101         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3102         xe = &xb_xh->xh_entries[count - 1];
3103         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3104         size = blocksize - offset;
3105
3106         /* copy all the names and values. */
3107         memcpy(target + offset, src + offset, size);
3108
3109         /* Init new header now. */
3110         xh->xh_count = xb_xh->xh_count;
3111         xh->xh_num_buckets = cpu_to_le16(1);
3112         xh->xh_name_value_len = cpu_to_le16(size);
3113         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3114
3115         /* copy all the entries. */
3116         target = bucket_block(bucket, 0);
3117         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3118         size = count * sizeof(struct ocfs2_xattr_entry);
3119         memcpy(target + offset, (char *)xb_xh + offset, size);
3120
3121         /* Change the xe offset for all the xe because of the move. */
3122         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3123                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3124         for (i = 0; i < count; i++)
3125                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3126
3127         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3128              offset, size, off_change);
3129
3130         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3131              cmp_xe, swap_xe);
3132 }
3133
3134 /*
3135  * After we move xattr from block to index btree, we have to
3136  * update ocfs2_xattr_search to the new xe and base.
3137  *
3138  * When the entry is in xattr block, xattr_bh indicates the storage place.
3139  * While if the entry is in index b-tree, "bucket" indicates the
3140  * real place of the xattr.
3141  */
3142 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3143                                             struct ocfs2_xattr_search *xs,
3144                                             struct buffer_head *old_bh)
3145 {
3146         char *buf = old_bh->b_data;
3147         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3148         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3149         int i;
3150
3151         xs->header = bucket_xh(xs->bucket);
3152         xs->base = bucket_block(xs->bucket, 0);
3153         xs->end = xs->base + inode->i_sb->s_blocksize;
3154
3155         if (xs->not_found)
3156                 return;
3157
3158         i = xs->here - old_xh->xh_entries;
3159         xs->here = &xs->header->xh_entries[i];
3160 }
3161
3162 static int ocfs2_xattr_create_index_block(struct inode *inode,
3163                                           struct ocfs2_xattr_search *xs,
3164                                           struct ocfs2_xattr_set_ctxt *ctxt)
3165 {
3166         int ret;
3167         u32 bit_off, len;
3168         u64 blkno;
3169         handle_t *handle = ctxt->handle;
3170         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3171         struct ocfs2_inode_info *oi = OCFS2_I(inode);
3172         struct buffer_head *xb_bh = xs->xattr_bh;
3173         struct ocfs2_xattr_block *xb =
3174                         (struct ocfs2_xattr_block *)xb_bh->b_data;
3175         struct ocfs2_xattr_tree_root *xr;
3176         u16 xb_flags = le16_to_cpu(xb->xb_flags);
3177
3178         mlog(0, "create xattr index block for %llu\n",
3179              (unsigned long long)xb_bh->b_blocknr);
3180
3181         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3182         BUG_ON(!xs->bucket);
3183
3184         /*
3185          * XXX:
3186          * We can use this lock for now, and maybe move to a dedicated mutex
3187          * if performance becomes a problem later.
3188          */
3189         down_write(&oi->ip_alloc_sem);
3190
3191         ret = ocfs2_journal_access(handle, inode, xb_bh,
3192                                    OCFS2_JOURNAL_ACCESS_WRITE);
3193         if (ret) {
3194                 mlog_errno(ret);
3195                 goto out;
3196         }
3197
3198         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3199                                      1, 1, &bit_off, &len);
3200         if (ret) {
3201                 mlog_errno(ret);
3202                 goto out;
3203         }
3204
3205         /*
3206          * The bucket may spread in many blocks, and
3207          * we will only touch the 1st block and the last block
3208          * in the whole bucket(one for entry and one for data).
3209          */
3210         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3211
3212         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3213              (unsigned long long)blkno);
3214
3215         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3216         if (ret) {
3217                 mlog_errno(ret);
3218                 goto out;
3219         }
3220
3221         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3222                                                 OCFS2_JOURNAL_ACCESS_CREATE);
3223         if (ret) {
3224                 mlog_errno(ret);
3225                 goto out;
3226         }
3227
3228         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3229         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3230
3231         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3232
3233         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3234         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3235                offsetof(struct ocfs2_xattr_block, xb_attrs));
3236
3237         xr = &xb->xb_attrs.xb_root;
3238         xr->xt_clusters = cpu_to_le32(1);
3239         xr->xt_last_eb_blk = 0;
3240         xr->xt_list.l_tree_depth = 0;
3241         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3242         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3243
3244         xr->xt_list.l_recs[0].e_cpos = 0;
3245         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3246         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3247
3248         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3249
3250         ocfs2_journal_dirty(handle, xb_bh);
3251
3252 out:
3253         up_write(&oi->ip_alloc_sem);
3254
3255         return ret;
3256 }
3257
3258 static int cmp_xe_offset(const void *a, const void *b)
3259 {
3260         const struct ocfs2_xattr_entry *l = a, *r = b;
3261         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3262         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3263
3264         if (l_name_offset < r_name_offset)
3265                 return 1;
3266         if (l_name_offset > r_name_offset)
3267                 return -1;
3268         return 0;
3269 }
3270
3271 /*
3272  * defrag a xattr bucket if we find that the bucket has some
3273  * holes beteen name/value pairs.
3274  * We will move all the name/value pairs to the end of the bucket
3275  * so that we can spare some space for insertion.
3276  */
3277 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3278                                      handle_t *handle,
3279                                      struct ocfs2_xattr_bucket *bucket)
3280 {
3281         int ret, i;
3282         size_t end, offset, len, value_len;
3283         struct ocfs2_xattr_header *xh;
3284         char *entries, *buf, *bucket_buf = NULL;
3285         u64 blkno = bucket_blkno(bucket);
3286         u16 xh_free_start;
3287         size_t blocksize = inode->i_sb->s_blocksize;
3288         struct ocfs2_xattr_entry *xe;
3289
3290         /*
3291          * In order to make the operation more efficient and generic,
3292          * we copy all the blocks into a contiguous memory and do the
3293          * defragment there, so if anything is error, we will not touch
3294          * the real block.
3295          */
3296         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3297         if (!bucket_buf) {
3298                 ret = -EIO;
3299                 goto out;
3300         }
3301
3302         buf = bucket_buf;
3303         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3304                 memcpy(buf, bucket_block(bucket, i), blocksize);
3305
3306         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3307                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3308         if (ret < 0) {
3309                 mlog_errno(ret);
3310                 goto out;
3311         }
3312
3313         xh = (struct ocfs2_xattr_header *)bucket_buf;
3314         entries = (char *)xh->xh_entries;
3315         xh_free_start = le16_to_cpu(xh->xh_free_start);
3316
3317         mlog(0, "adjust xattr bucket in %llu, count = %u, "
3318              "xh_free_start = %u, xh_name_value_len = %u.\n",
3319              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3320              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3321
3322         /*
3323          * sort all the entries by their offset.
3324          * the largest will be the first, so that we can
3325          * move them to the end one by one.
3326          */
3327         sort(entries, le16_to_cpu(xh->xh_count),
3328              sizeof(struct ocfs2_xattr_entry),
3329              cmp_xe_offset, swap_xe);
3330
3331         /* Move all name/values to the end of the bucket. */
3332         xe = xh->xh_entries;
3333         end = OCFS2_XATTR_BUCKET_SIZE;
3334         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3335                 offset = le16_to_cpu(xe->xe_name_offset);
3336                 if (ocfs2_xattr_is_local(xe))
3337                         value_len = OCFS2_XATTR_SIZE(
3338                                         le64_to_cpu(xe->xe_value_size));
3339                 else
3340                         value_len = OCFS2_XATTR_ROOT_SIZE;
3341                 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3342
3343                 /*
3344                  * We must make sure that the name/value pair
3345                  * exist in the same block. So adjust end to
3346                  * the previous block end if needed.
3347                  */
3348                 if (((end - len) / blocksize !=
3349                         (end - 1) / blocksize))
3350                         end = end - end % blocksize;
3351
3352                 if (end > offset + len) {
3353                         memmove(bucket_buf + end - len,
3354                                 bucket_buf + offset, len);
3355                         xe->xe_name_offset = cpu_to_le16(end - len);
3356                 }
3357
3358                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3359                                 "bucket %llu\n", (unsigned long long)blkno);
3360
3361                 end -= len;
3362         }
3363
3364         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3365                         "bucket %llu\n", (unsigned long long)blkno);
3366
3367         if (xh_free_start == end)
3368                 goto out;
3369
3370         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3371         xh->xh_free_start = cpu_to_le16(end);
3372
3373         /* sort the entries by their name_hash. */
3374         sort(entries, le16_to_cpu(xh->xh_count),
3375              sizeof(struct ocfs2_xattr_entry),
3376              cmp_xe, swap_xe);
3377
3378         buf = bucket_buf;
3379         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3380                 memcpy(bucket_block(bucket, i), buf, blocksize);
3381         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3382
3383 out:
3384         kfree(bucket_buf);
3385         return ret;
3386 }
3387
3388 /*
3389  * Move half nums of the xattr bucket in the previous cluster to this new
3390  * cluster. We only touch the last cluster of the previous extend record.
3391  *
3392  * first_bh is the first buffer_head of a series of bucket in the same
3393  * extent rec and header_bh is the header of one bucket in this cluster.
3394  * They will be updated if we move the data header_bh contains to the new
3395  * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3396  */
3397 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3398                                                handle_t *handle,
3399                                                struct buffer_head **first_bh,
3400                                                struct buffer_head **header_bh,
3401                                                u64 new_blkno,
3402                                                u64 prev_blkno,
3403                                                u32 num_clusters,
3404                                                u32 *first_hash)
3405 {
3406         int i, ret, credits;
3407         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3408         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3409         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3410         int blocksize = inode->i_sb->s_blocksize;
3411         struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3412         struct ocfs2_xattr_header *new_xh;
3413         struct ocfs2_xattr_header *xh =
3414                         (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3415
3416         BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3417         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3418
3419         prev_bh = *first_bh;
3420         get_bh(prev_bh);
3421         xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3422
3423         prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3424
3425         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3426              (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
3427
3428         /*
3429          * We need to update the 1st half of the new cluster and
3430          * 1 more for the update of the 1st bucket of the previous
3431          * extent record.
3432          */
3433         credits = bpc / 2 + 1 + handle->h_buffer_credits;
3434         ret = ocfs2_extend_trans(handle, credits);
3435         if (ret) {
3436                 mlog_errno(ret);
3437                 goto out;
3438         }
3439
3440         ret = ocfs2_journal_access(handle, inode, prev_bh,
3441                                    OCFS2_JOURNAL_ACCESS_WRITE);
3442         if (ret) {
3443                 mlog_errno(ret);
3444                 goto out;
3445         }
3446
3447         for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3448                 old_bh = new_bh = NULL;
3449                 new_bh = sb_getblk(inode->i_sb, new_blkno);
3450                 if (!new_bh) {
3451                         ret = -EIO;
3452                         mlog_errno(ret);
3453                         goto out;
3454                 }
3455
3456                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
3457
3458                 ret = ocfs2_journal_access(handle, inode, new_bh,
3459                                            OCFS2_JOURNAL_ACCESS_CREATE);
3460                 if (ret < 0) {
3461                         mlog_errno(ret);
3462                         brelse(new_bh);
3463                         goto out;
3464                 }
3465
3466                 ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
3467                 if (ret < 0) {
3468                         mlog_errno(ret);
3469                         brelse(new_bh);
3470                         goto out;
3471                 }
3472
3473                 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3474
3475                 if (i == 0) {
3476                         new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3477                         new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3478
3479                         if (first_hash)
3480                                 *first_hash = le32_to_cpu(
3481                                         new_xh->xh_entries[0].xe_name_hash);
3482                         new_first_bh = new_bh;
3483                         get_bh(new_first_bh);
3484                 }
3485
3486                 ocfs2_journal_dirty(handle, new_bh);
3487
3488                 if (*header_bh == old_bh) {
3489                         brelse(*header_bh);
3490                         *header_bh = new_bh;
3491                         get_bh(*header_bh);
3492
3493                         brelse(*first_bh);
3494                         *first_bh = new_first_bh;
3495                         get_bh(*first_bh);
3496                 }
3497                 brelse(new_bh);
3498                 brelse(old_bh);
3499         }
3500
3501         le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3502
3503         ocfs2_journal_dirty(handle, prev_bh);
3504 out:
3505         brelse(prev_bh);
3506         brelse(new_first_bh);
3507         return ret;
3508 }
3509
3510 /*
3511  * Find the suitable pos when we divide a bucket into 2.
3512  * We have to make sure the xattrs with the same hash value exist
3513  * in the same bucket.
3514  *
3515  * If this ocfs2_xattr_header covers more than one hash value, find a
3516  * place where the hash value changes.  Try to find the most even split.
3517  * The most common case is that all entries have different hash values,
3518  * and the first check we make will find a place to split.
3519  */
3520 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3521 {
3522         struct ocfs2_xattr_entry *entries = xh->xh_entries;
3523         int count = le16_to_cpu(xh->xh_count);
3524         int delta, middle = count / 2;
3525
3526         /*
3527          * We start at the middle.  Each step gets farther away in both
3528          * directions.  We therefore hit the change in hash value
3529          * nearest to the middle.  Note that this loop does not execute for
3530          * count < 2.
3531          */
3532         for (delta = 0; delta < middle; delta++) {
3533                 /* Let's check delta earlier than middle */
3534                 if (cmp_xe(&entries[middle - delta - 1],
3535                            &entries[middle - delta]))
3536                         return middle - delta;
3537
3538                 /* For even counts, don't walk off the end */
3539                 if ((middle + delta + 1) == count)
3540                         continue;
3541
3542                 /* Now try delta past middle */
3543                 if (cmp_xe(&entries[middle + delta],
3544                            &entries[middle + delta + 1]))
3545                         return middle + delta + 1;
3546         }
3547
3548         /* Every entry had the same hash */
3549         return count;
3550 }
3551
3552 /*
3553  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3554  * first_hash will record the 1st hash of the new bucket.
3555  *
3556  * Normally half of the xattrs will be moved.  But we have to make
3557  * sure that the xattrs with the same hash value are stored in the
3558  * same bucket. If all the xattrs in this bucket have the same hash
3559  * value, the new bucket will be initialized as an empty one and the
3560  * first_hash will be initialized as (hash_value+1).
3561  */
3562 static int ocfs2_divide_xattr_bucket(struct inode *inode,
3563                                     handle_t *handle,
3564                                     u64 blk,
3565                                     u64 new_blk,
3566                                     u32 *first_hash,
3567                                     int new_bucket_head)
3568 {
3569         int ret, i;
3570         int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3571         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3572         struct ocfs2_xattr_header *xh;
3573         struct ocfs2_xattr_entry *xe;
3574         int blocksize = inode->i_sb->s_blocksize;
3575
3576         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3577              (unsigned long long)blk, (unsigned long long)new_blk);
3578
3579         s_bucket = ocfs2_xattr_bucket_new(inode);
3580         t_bucket = ocfs2_xattr_bucket_new(inode);
3581         if (!s_bucket || !t_bucket) {
3582                 ret = -ENOMEM;
3583                 mlog_errno(ret);
3584                 goto out;
3585         }
3586
3587         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3588         if (ret) {
3589                 mlog_errno(ret);
3590                 goto out;
3591         }
3592
3593         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3594                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3595         if (ret) {
3596                 mlog_errno(ret);
3597                 goto out;
3598         }
3599
3600         /*
3601          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
3602          * there's no need to read it.
3603          */
3604         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3605         if (ret) {
3606                 mlog_errno(ret);
3607                 goto out;
3608         }
3609
3610         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3611                                                 new_bucket_head ?
3612                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3613                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3614         if (ret) {
3615                 mlog_errno(ret);
3616                 goto out;
3617         }
3618
3619         xh = bucket_xh(s_bucket);
3620         count = le16_to_cpu(xh->xh_count);
3621         start = ocfs2_xattr_find_divide_pos(xh);
3622
3623         if (start == count) {
3624                 xe = &xh->xh_entries[start-1];
3625
3626                 /*
3627                  * initialized a new empty bucket here.
3628                  * The hash value is set as one larger than
3629                  * that of the last entry in the previous bucket.
3630                  */
3631                 for (i = 0; i < t_bucket->bu_blocks; i++)
3632                         memset(bucket_block(t_bucket, i), 0, blocksize);
3633
3634                 xh = bucket_xh(t_bucket);
3635                 xh->xh_free_start = cpu_to_le16(blocksize);
3636                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3637                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3638
3639                 goto set_num_buckets;
3640         }
3641
3642         /* copy the whole bucket to the new first. */
3643         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3644
3645         /* update the new bucket. */
3646         xh = bucket_xh(t_bucket);
3647
3648         /*
3649          * Calculate the total name/value len and xh_free_start for
3650          * the old bucket first.
3651          */
3652         name_offset = OCFS2_XATTR_BUCKET_SIZE;
3653         name_value_len = 0;
3654         for (i = 0; i < start; i++) {
3655                 xe = &xh->xh_entries[i];
3656                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3657                 if (ocfs2_xattr_is_local(xe))
3658                         xe_len +=
3659                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3660                 else
3661                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3662                 name_value_len += xe_len;
3663                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3664                         name_offset = le16_to_cpu(xe->xe_name_offset);
3665         }
3666
3667         /*
3668          * Now begin the modification to the new bucket.
3669          *
3670          * In the new bucket, We just move the xattr entry to the beginning
3671          * and don't touch the name/value. So there will be some holes in the
3672          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3673          * called.
3674          */
3675         xe = &xh->xh_entries[start];
3676         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3677         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3678              (int)((char *)xe - (char *)xh),
3679              (int)((char *)xh->xh_entries - (char *)xh));
3680         memmove((char *)xh->xh_entries, (char *)xe, len);
3681         xe = &xh->xh_entries[count - start];
3682         len = sizeof(struct ocfs2_xattr_entry) * start;
3683         memset((char *)xe, 0, len);
3684
3685         le16_add_cpu(&xh->xh_count, -start);
3686         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3687
3688         /* Calculate xh_free_start for the new bucket. */
3689         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3690         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3691                 xe = &xh->xh_entries[i];
3692                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3693                 if (ocfs2_xattr_is_local(xe))
3694                         xe_len +=
3695                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3696                 else
3697                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3698                 if (le16_to_cpu(xe->xe_name_offset) <
3699                     le16_to_cpu(xh->xh_free_start))
3700                         xh->xh_free_start = xe->xe_name_offset;
3701         }
3702
3703 set_num_buckets:
3704         /* set xh->xh_num_buckets for the new xh. */
3705         if (new_bucket_head)
3706                 xh->xh_num_buckets = cpu_to_le16(1);
3707         else
3708                 xh->xh_num_buckets = 0;
3709
3710         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3711
3712         /* store the first_hash of the new bucket. */
3713         if (first_hash)
3714                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3715
3716         /*
3717          * Now only update the 1st block of the old bucket.  If we
3718          * just added a new empty bucket, there is no need to modify
3719          * it.
3720          */
3721         if (start == count)
3722                 goto out;
3723
3724         xh = bucket_xh(s_bucket);
3725         memset(&xh->xh_entries[start], 0,
3726                sizeof(struct ocfs2_xattr_entry) * (count - start));
3727         xh->xh_count = cpu_to_le16(start);
3728         xh->xh_free_start = cpu_to_le16(name_offset);
3729         xh->xh_name_value_len = cpu_to_le16(name_value_len);
3730
3731         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
3732
3733 out:
3734         ocfs2_xattr_bucket_free(s_bucket);
3735         ocfs2_xattr_bucket_free(t_bucket);
3736
3737         return ret;
3738 }
3739
3740 /*
3741  * Copy xattr from one bucket to another bucket.
3742  *
3743  * The caller must make sure that the journal transaction
3744  * has enough space for journaling.
3745  */
3746 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3747                                  handle_t *handle,
3748                                  u64 s_blkno,
3749                                  u64 t_blkno,
3750                                  int t_is_new)
3751 {
3752         int ret;
3753         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3754
3755         BUG_ON(s_blkno == t_blkno);
3756
3757         mlog(0, "cp bucket %llu to %llu, target is %d\n",
3758              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3759              t_is_new);
3760
3761         s_bucket = ocfs2_xattr_bucket_new(inode);
3762         t_bucket = ocfs2_xattr_bucket_new(inode);
3763         if (!s_bucket || !t_bucket) {
3764                 ret = -ENOMEM;
3765                 mlog_errno(ret);
3766                 goto out;
3767         }
3768   
3769         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
3770         if (ret)
3771                 goto out;
3772
3773         /*
3774          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
3775          * there's no need to read it.
3776          */
3777         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
3778         if (ret)
3779                 goto out;
3780
3781         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3782                                                 t_is_new ?
3783                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3784                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3785         if (ret)
3786                 goto out;
3787
3788         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3789         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3790
3791 out:
3792         ocfs2_xattr_bucket_free(t_bucket);
3793         ocfs2_xattr_bucket_free(s_bucket);
3794
3795         return ret;
3796 }
3797
3798 /*
3799  * Copy one xattr cluster from src_blk to to_blk.
3800  * The to_blk will become the first bucket header of the cluster, so its
3801  * xh_num_buckets will be initialized as the bucket num in the cluster.
3802  */
3803 static int ocfs2_cp_xattr_cluster(struct inode *inode,
3804                                   handle_t *handle,
3805                                   struct buffer_head *first_bh,
3806                                   u64 src_blk,
3807                                   u64 to_blk,
3808                                   u32 *first_hash)
3809 {
3810         int i, ret, credits;
3811         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3812         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3813         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3814         struct buffer_head *bh = NULL;
3815         struct ocfs2_xattr_header *xh;
3816         u64 to_blk_start = to_blk;
3817
3818         mlog(0, "cp xattrs from cluster %llu to %llu\n",
3819              (unsigned long long)src_blk, (unsigned long long)to_blk);
3820
3821         /*
3822          * We need to update the new cluster and 1 more for the update of
3823          * the 1st bucket of the previous extent rec.
3824          */
3825         credits = bpc + 1 + handle->h_buffer_credits;
3826         ret = ocfs2_extend_trans(handle, credits);
3827         if (ret) {
3828                 mlog_errno(ret);
3829                 goto out;
3830         }
3831
3832         ret = ocfs2_journal_access(handle, inode, first_bh,
3833                                    OCFS2_JOURNAL_ACCESS_WRITE);
3834         if (ret) {
3835                 mlog_errno(ret);
3836                 goto out;
3837         }
3838
3839         for (i = 0; i < num_buckets; i++) {
3840                 ret = ocfs2_cp_xattr_bucket(inode, handle,
3841                                             src_blk, to_blk, 1);
3842                 if (ret) {
3843                         mlog_errno(ret);
3844                         goto out;
3845                 }
3846
3847                 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3848                 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3849         }
3850
3851         /* update the old bucket header. */
3852         xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3853         le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3854
3855         ocfs2_journal_dirty(handle, first_bh);
3856
3857         /* update the new bucket header. */
3858         ret = ocfs2_read_block(inode, to_blk_start, &bh);
3859         if (ret < 0) {
3860                 mlog_errno(ret);
3861                 goto out;
3862         }
3863
3864         ret = ocfs2_journal_access(handle, inode, bh,
3865                                    OCFS2_JOURNAL_ACCESS_WRITE);
3866         if (ret) {
3867                 mlog_errno(ret);
3868                 goto out;
3869         }
3870
3871         xh = (struct ocfs2_xattr_header *)bh->b_data;
3872         xh->xh_num_buckets = cpu_to_le16(num_buckets);
3873
3874         ocfs2_journal_dirty(handle, bh);
3875
3876         if (first_hash)
3877                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3878 out:
3879         brelse(bh);
3880         return ret;
3881 }
3882
3883 /*
3884  * Move some xattrs in this cluster to the new cluster.
3885  * This function should only be called when bucket size == cluster size.
3886  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3887  */
3888 static int ocfs2_divide_xattr_cluster(struct inode *inode,
3889                                       handle_t *handle,
3890                                       u64 prev_blk,
3891                                       u64 new_blk,
3892                                       u32 *first_hash)
3893 {
3894         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3895         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
3896
3897         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3898
3899         ret = ocfs2_extend_trans(handle, credits);
3900         if (ret) {
3901                 mlog_errno(ret);
3902                 return ret;
3903         }
3904
3905         /* Move half of the xattr in start_blk to the next bucket. */
3906         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
3907                                           new_blk, first_hash, 1);
3908 }
3909
3910 /*
3911  * Move some xattrs from the old cluster to the new one since they are not
3912  * contiguous in ocfs2 xattr tree.
3913  *
3914  * new_blk starts a new separate cluster, and we will move some xattrs from
3915  * prev_blk to it. v_start will be set as the first name hash value in this
3916  * new cluster so that it can be used as e_cpos during tree insertion and
3917  * don't collide with our original b-tree operations. first_bh and header_bh
3918  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3919  * to extend the insert bucket.
3920  *
3921  * The problem is how much xattr should we move to the new one and when should
3922  * we update first_bh and header_bh?
3923  * 1. If cluster size > bucket size, that means the previous cluster has more
3924  *    than 1 bucket, so just move half nums of bucket into the new cluster and
3925  *    update the first_bh and header_bh if the insert bucket has been moved
3926  *    to the new cluster.
3927  * 2. If cluster_size == bucket_size:
3928  *    a) If the previous extent rec has more than one cluster and the insert
3929  *       place isn't in the last cluster, copy the entire last cluster to the
3930  *       new one. This time, we don't need to upate the first_bh and header_bh
3931  *       since they will not be moved into the new cluster.
3932  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
3933  *       the new one. And we set the extend flag to zero if the insert place is
3934  *       moved into the new allocated cluster since no extend is needed.
3935  */
3936 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3937                                             handle_t *handle,
3938                                             struct buffer_head **first_bh,
3939                                             struct buffer_head **header_bh,
3940                                             u64 new_blk,
3941                                             u64 prev_blk,
3942                                             u32 prev_clusters,
3943                                             u32 *v_start,
3944                                             int *extend)
3945 {
3946         int ret = 0;
3947         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3948
3949         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3950              (unsigned long long)prev_blk, prev_clusters,
3951              (unsigned long long)new_blk);
3952
3953         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3954                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3955                                                           handle,
3956                                                           first_bh,
3957                                                           header_bh,
3958                                                           new_blk,
3959                                                           prev_blk,
3960                                                           prev_clusters,
3961                                                           v_start);
3962         else {
3963                 u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3964
3965                 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3966                         ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3967                                                      last_blk, new_blk,
3968                                                      v_start);
3969                 else {
3970                         ret = ocfs2_divide_xattr_cluster(inode, handle,
3971                                                          last_blk, new_blk,
3972                                                          v_start);
3973
3974                         if ((*header_bh)->b_blocknr == last_blk && extend)
3975                                 *extend = 0;
3976                 }
3977         }
3978
3979         return ret;
3980 }
3981
3982 /*
3983  * Add a new cluster for xattr storage.
3984  *
3985  * If the new cluster is contiguous with the previous one, it will be
3986  * appended to the same extent record, and num_clusters will be updated.
3987  * If not, we will insert a new extent for it and move some xattrs in
3988  * the last cluster into the new allocated one.
3989  * We also need to limit the maximum size of a btree leaf, otherwise we'll
3990  * lose the benefits of hashing because we'll have to search large leaves.
3991  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3992  * if it's bigger).
3993  *
3994  * first_bh is the first block of the previous extent rec and header_bh
3995  * indicates the bucket we will insert the new xattrs. They will be updated
3996  * when the header_bh is moved into the new cluster.
3997  */
3998 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3999                                        struct buffer_head *root_bh,
4000                                        struct buffer_head **first_bh,
4001                                        struct buffer_head **header_bh,
4002                                        u32 *num_clusters,
4003                                        u32 prev_cpos,
4004                                        u64 prev_blkno,
4005                                        int *extend,
4006                                        struct ocfs2_xattr_set_ctxt *ctxt)
4007 {
4008         int ret;
4009         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4010         u32 prev_clusters = *num_clusters;
4011         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4012         u64 block;
4013         handle_t *handle = ctxt->handle;
4014         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4015         struct ocfs2_extent_tree et;
4016
4017         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4018              "previous xattr blkno = %llu\n",
4019              (unsigned long long)OCFS2_I(inode)->ip_blkno,
4020              prev_cpos, (unsigned long long)prev_blkno);
4021
4022         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4023
4024         ret = ocfs2_journal_access(handle, inode, root_bh,
4025                                    OCFS2_JOURNAL_ACCESS_WRITE);
4026         if (ret < 0) {
4027                 mlog_errno(ret);
4028                 goto leave;
4029         }
4030
4031         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4032                                      clusters_to_add, &bit_off, &num_bits);
4033         if (ret < 0) {
4034                 if (ret != -ENOSPC)
4035                         mlog_errno(ret);
4036                 goto leave;
4037         }
4038
4039         BUG_ON(num_bits > clusters_to_add);
4040
4041         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4042         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4043              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4044
4045         if (prev_blkno + prev_clusters * bpc == block &&
4046             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4047              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4048                 /*
4049                  * If this cluster is contiguous with the old one and
4050                  * adding this new cluster, we don't surpass the limit of
4051                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4052                  * initialized and used like other buckets in the previous
4053                  * cluster.
4054                  * So add it as a contiguous one. The caller will handle
4055                  * its init process.
4056                  */
4057                 v_start = prev_cpos + prev_clusters;
4058                 *num_clusters = prev_clusters + num_bits;
4059                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4060                      num_bits);
4061         } else {
4062                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
4063                                                        handle,
4064                                                        first_bh,
4065                                                        header_bh,
4066                                                        block,
4067                                                        prev_blkno,
4068                                                        prev_clusters,
4069                                                        &v_start,
4070                                                        extend);
4071                 if (ret) {
4072                         mlog_errno(ret);
4073                         goto leave;
4074                 }
4075         }
4076
4077         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4078              num_bits, (unsigned long long)block, v_start);
4079         ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
4080                                   num_bits, 0, ctxt->meta_ac);
4081         if (ret < 0) {
4082                 mlog_errno(ret);
4083                 goto leave;
4084         }
4085
4086         ret = ocfs2_journal_dirty(handle, root_bh);
4087         if (ret < 0)
4088                 mlog_errno(ret);
4089
4090 leave:
4091         return ret;
4092 }
4093
4094 /*
4095  * Extend a new xattr bucket and move xattrs to the end one by one until
4096  * We meet with start_bh. Only move half of the xattrs to the bucket after it.
4097  */
4098 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4099                                      handle_t *handle,
4100                                      struct buffer_head *first_bh,
4101                                      struct buffer_head *start_bh,
4102                                      u32 num_clusters)
4103 {
4104         int ret, credits;
4105         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4106         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4107         u64 start_blk = start_bh->b_blocknr, end_blk;
4108         u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
4109         struct ocfs2_xattr_header *first_xh =
4110                                 (struct ocfs2_xattr_header *)first_bh->b_data;
4111         u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
4112
4113         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4114              "from %llu, len = %u\n", (unsigned long long)start_blk,
4115              (unsigned long long)first_bh->b_blocknr, num_clusters);
4116
4117         BUG_ON(bucket >= num_buckets);
4118
4119         end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
4120
4121         /*
4122          * We will touch all the buckets after the start_bh(include it).
4123          * Then we add one more bucket.
4124          */
4125         credits = end_blk - start_blk + 3 * blk_per_bucket + 1 +
4126                   handle->h_buffer_credits;
4127         ret = ocfs2_extend_trans(handle, credits);
4128         if (ret) {
4129                 mlog_errno(ret);
4130                 goto out;
4131         }
4132
4133         ret = ocfs2_journal_access(handle, inode, first_bh,
4134                                    OCFS2_JOURNAL_ACCESS_WRITE);
4135         if (ret) {
4136                 mlog_errno(ret);
4137                 goto out;
4138         }
4139
4140         while (end_blk != start_blk) {
4141                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4142                                             end_blk + blk_per_bucket, 0);
4143                 if (ret)
4144                         goto out;
4145                 end_blk -= blk_per_bucket;
4146         }
4147
4148         /* Move half of the xattr in start_blk to the next bucket. */
4149         ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
4150                                         start_blk + blk_per_bucket, NULL, 0);
4151
4152         le16_add_cpu(&first_xh->xh_num_buckets, 1);
4153         ocfs2_journal_dirty(handle, first_bh);
4154
4155 out:
4156         return ret;
4157 }
4158
4159 /*
4160  * Add new xattr bucket in an extent record and adjust the buckets accordingly.
4161  * xb_bh is the ocfs2_xattr_block.
4162  * We will move all the buckets starting from header_bh to the next place. As
4163  * for this one, half num of its xattrs will be moved to the next one.
4164  *
4165  * We will allocate a new cluster if current cluster is full and adjust
4166  * header_bh and first_bh if the insert place is moved to the new cluster.
4167  */
4168 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4169                                       struct buffer_head *xb_bh,
4170                                       struct buffer_head *header_bh,
4171                                       struct ocfs2_xattr_set_ctxt *ctxt)
4172 {
4173         struct ocfs2_xattr_header *first_xh = NULL;
4174         struct buffer_head *first_bh = NULL;
4175         struct ocfs2_xattr_block *xb =
4176                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4177         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4178         struct ocfs2_extent_list *el = &xb_root->xt_list;
4179         struct ocfs2_xattr_header *xh =
4180                         (struct ocfs2_xattr_header *)header_bh->b_data;
4181         u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4182         struct super_block *sb = inode->i_sb;
4183         struct ocfs2_super *osb = OCFS2_SB(sb);
4184         int ret, num_buckets, extend = 1;
4185         u64 p_blkno;
4186         u32 e_cpos, num_clusters;
4187
4188         mlog(0, "Add new xattr bucket starting form %llu\n",
4189              (unsigned long long)header_bh->b_blocknr);
4190
4191         /*
4192          * Add refrence for header_bh here because it may be
4193          * changed in ocfs2_add_new_xattr_cluster and we need
4194          * to free it in the end.
4195          */
4196         get_bh(header_bh);
4197
4198         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4199                                   &num_clusters, el);
4200         if (ret) {
4201                 mlog_errno(ret);
4202                 goto out;
4203         }
4204
4205         ret = ocfs2_read_block(inode, p_blkno, &first_bh);
4206         if (ret) {
4207                 mlog_errno(ret);
4208                 goto out;
4209         }
4210
4211         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4212         first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
4213
4214         if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
4215                 ret = ocfs2_add_new_xattr_cluster(inode,
4216                                                   xb_bh,
4217                                                   &first_bh,
4218                                                   &header_bh,
4219                                                   &num_clusters,
4220                                                   e_cpos,
4221                                                   p_blkno,
4222                                                   &extend,
4223                                                   ctxt);
4224                 if (ret) {
4225                         mlog_errno(ret);
4226                         goto out;
4227                 }
4228         }
4229
4230         if (extend)
4231                 ret = ocfs2_extend_xattr_bucket(inode,
4232                                                 ctxt->handle,
4233                                                 first_bh,
4234                                                 header_bh,
4235                                                 num_clusters);
4236         if (ret)
4237                 mlog_errno(ret);
4238 out:
4239         brelse(first_bh);
4240         brelse(header_bh);
4241         return ret;
4242 }
4243
4244 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4245                                         struct ocfs2_xattr_bucket *bucket,
4246                                         int offs)
4247 {
4248         int block_off = offs >> inode->i_sb->s_blocksize_bits;
4249
4250         offs = offs % inode->i_sb->s_blocksize;
4251         return bucket_block(bucket, block_off) + offs;
4252 }
4253
4254 /*
4255  * Handle the normal xattr set, including replace, delete and new.
4256  *
4257  * Note: "local" indicates the real data's locality. So we can't
4258  * just its bucket locality by its length.
4259  */
4260 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4261                                          struct ocfs2_xattr_info *xi,
4262                                          struct ocfs2_xattr_search *xs,
4263                                          u32 name_hash,
4264                                          int local)
4265 {
4266         struct ocfs2_xattr_entry *last, *xe;
4267         int name_len = strlen(xi->name);
4268         struct ocfs2_xattr_header *xh = xs->header;
4269         u16 count = le16_to_cpu(xh->xh_count), start;
4270         size_t blocksize = inode->i_sb->s_blocksize;
4271         char *val;
4272         size_t offs, size, new_size;
4273
4274         last = &xh->xh_entries[count];
4275         if (!xs->not_found) {
4276                 xe = xs->here;
4277                 offs = le16_to_cpu(xe->xe_name_offset);
4278                 if (ocfs2_xattr_is_local(xe))
4279                         size = OCFS2_XATTR_SIZE(name_len) +
4280                         OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4281                 else
4282                         size = OCFS2_XATTR_SIZE(name_len) +
4283                         OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4284
4285                 /*
4286                  * If the new value will be stored outside, xi->value has been
4287                  * initalized as an empty ocfs2_xattr_value_root, and the same
4288                  * goes with xi->value_len, so we can set new_size safely here.
4289                  * See ocfs2_xattr_set_in_bucket.
4290                  */
4291                 new_size = OCFS2_XATTR_SIZE(name_len) +
4292                            OCFS2_XATTR_SIZE(xi->value_len);
4293
4294                 le16_add_cpu(&xh->xh_name_value_len, -size);
4295                 if (xi->value) {
4296                         if (new_size > size)
4297                                 goto set_new_name_value;
4298
4299                         /* Now replace the old value with new one. */
4300                         if (local)
4301                                 xe->xe_value_size = cpu_to_le64(xi->value_len);
4302                         else
4303                                 xe->xe_value_size = 0;
4304
4305                         val = ocfs2_xattr_bucket_get_val(inode,
4306                                                          xs->bucket, offs);
4307                         memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4308                                size - OCFS2_XATTR_SIZE(name_len));
4309                         if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4310                                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
4311                                        xi->value, xi->value_len);
4312
4313                         le16_add_cpu(&xh->xh_name_value_len, new_size);
4314                         ocfs2_xattr_set_local(xe, local);
4315                         return;
4316                 } else {
4317                         /*
4318                          * Remove the old entry if there is more than one.
4319                          * We don't remove the last entry so that we can
4320                          * use it to indicate the hash value of the empty
4321                          * bucket.
4322                          */
4323                         last -= 1;
4324                         le16_add_cpu(&xh->xh_count, -1);
4325                         if (xh->xh_count) {
4326                                 memmove(xe, xe + 1,
4327                                         (void *)last - (void *)xe);
4328                                 memset(last, 0,
4329                                        sizeof(struct ocfs2_xattr_entry));
4330                         } else
4331                                 xh->xh_free_start =
4332                                         cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4333
4334                         return;
4335                 }
4336         } else {
4337                 /* find a new entry for insert. */
4338                 int low = 0, high = count - 1, tmp;
4339                 struct ocfs2_xattr_entry *tmp_xe;
4340
4341                 while (low <= high && count) {
4342                         tmp = (low + high) / 2;
4343                         tmp_xe = &xh->xh_entries[tmp];
4344
4345                         if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4346                                 low = tmp + 1;
4347                         else if (name_hash <
4348                                  le32_to_cpu(tmp_xe->xe_name_hash))
4349                                 high = tmp - 1;
4350                         else {
4351                                 low = tmp;
4352                                 break;
4353                         }
4354                 }
4355
4356                 xe = &xh->xh_entries[low];
4357                 if (low != count)
4358                         memmove(xe + 1, xe, (void *)last - (void *)xe);
4359
4360                 le16_add_cpu(&xh->xh_count, 1);
4361                 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4362                 xe->xe_name_hash = cpu_to_le32(name_hash);
4363                 xe->xe_name_len = name_len;
4364                 ocfs2_xattr_set_type(xe, xi->name_index);
4365         }
4366
4367 set_new_name_value:
4368         /* Insert the new name+value. */
4369         size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4370
4371         /*
4372          * We must make sure that the name/value pair
4373          * exists in the same block.
4374          */
4375         offs = le16_to_cpu(xh->xh_free_start);
4376         start = offs - size;
4377
4378         if (start >> inode->i_sb->s_blocksize_bits !=
4379             (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4380                 offs = offs - offs % blocksize;
4381                 xh->xh_free_start = cpu_to_le16(offs);
4382         }
4383
4384         val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4385         xe->xe_name_offset = cpu_to_le16(offs - size);
4386
4387         memset(val, 0, size);
4388         memcpy(val, xi->name, name_len);
4389         memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4390
4391         xe->xe_value_size = cpu_to_le64(xi->value_len);
4392         ocfs2_xattr_set_local(xe, local);
4393         xs->here = xe;
4394         le16_add_cpu(&xh->xh_free_start, -size);
4395         le16_add_cpu(&xh->xh_name_value_len, size);
4396
4397         return;
4398 }
4399
4400 /*
4401  * Set the xattr entry in the specified bucket.
4402  * The bucket is indicated by xs->bucket and it should have the enough
4403  * space for the xattr insertion.
4404  */
4405 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4406                                            handle_t *handle,
4407                                            struct ocfs2_xattr_info *xi,
4408                                            struct ocfs2_xattr_search *xs,
4409                                            u32 name_hash,
4410                                            int local)
4411 {
4412         int ret;
4413         u64 blkno;
4414
4415         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4416              (unsigned long)xi->value_len, xi->name_index,
4417              (unsigned long long)bucket_blkno(xs->bucket));
4418
4419         if (!xs->bucket->bu_bhs[1]) {
4420                 blkno = bucket_blkno(xs->bucket);
4421                 ocfs2_xattr_bucket_relse(xs->bucket);
4422                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4423                 if (ret) {
4424                         mlog_errno(ret);
4425                         goto out;
4426                 }
4427         }
4428
4429         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4430                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4431         if (ret < 0) {
4432                 mlog_errno(ret);
4433                 goto out;
4434         }
4435
4436         ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4437         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4438
4439 out:
4440         return ret;
4441 }
4442
4443 static int ocfs2_xattr_value_update_size(struct inode *inode,
4444                                          handle_t *handle,
4445                                          struct buffer_head *xe_bh,
4446                                          struct ocfs2_xattr_entry *xe,
4447                                          u64 new_size)
4448 {
4449         int ret;
4450
4451         ret = ocfs2_journal_access(handle, inode, xe_bh,
4452                                    OCFS2_JOURNAL_ACCESS_WRITE);
4453         if (ret < 0) {
4454                 mlog_errno(ret);
4455                 goto out;
4456         }
4457
4458         xe->xe_value_size = cpu_to_le64(new_size);
4459
4460         ret = ocfs2_journal_dirty(handle, xe_bh);
4461         if (ret < 0)
4462                 mlog_errno(ret);
4463
4464 out:
4465         return ret;
4466 }
4467
4468 /*
4469  * Truncate the specified xe_off entry in xattr bucket.
4470  * bucket is indicated by header_bh and len is the new length.
4471  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4472  *
4473  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4474  */
4475 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4476                                              struct buffer_head *header_bh,
4477                                              int xe_off,
4478                                              int len,
4479                                              struct ocfs2_xattr_set_ctxt *ctxt)
4480 {
4481         int ret, offset;
4482         u64 value_blk;
4483         struct buffer_head *value_bh = NULL;
4484         struct ocfs2_xattr_value_root *xv;
4485         struct ocfs2_xattr_entry *xe;
4486         struct ocfs2_xattr_header *xh =
4487                         (struct ocfs2_xattr_header *)header_bh->b_data;
4488         size_t blocksize = inode->i_sb->s_blocksize;
4489
4490         xe = &xh->xh_entries[xe_off];
4491
4492         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4493
4494         offset = le16_to_cpu(xe->xe_name_offset) +
4495                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4496
4497         value_blk = offset / blocksize;
4498
4499         /* We don't allow ocfs2_xattr_value to be stored in different block. */
4500         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4501         value_blk += header_bh->b_blocknr;
4502
4503         ret = ocfs2_read_block(inode, value_blk, &value_bh);
4504         if (ret) {
4505                 mlog_errno(ret);
4506                 goto out;
4507         }
4508
4509         xv = (struct ocfs2_xattr_value_root *)
4510                 (value_bh->b_data + offset % blocksize);
4511
4512         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4513              xe_off, (unsigned long long)header_bh->b_blocknr, len);
4514         ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
4515         if (ret) {
4516                 mlog_errno(ret);
4517                 goto out;
4518         }
4519
4520         ret = ocfs2_xattr_value_update_size(inode, ctxt->handle,
4521                                             header_bh, xe, len);
4522         if (ret) {
4523                 mlog_errno(ret);
4524                 goto out;
4525         }
4526
4527 out:
4528         brelse(value_bh);
4529         return ret;
4530 }
4531
4532 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4533                                         struct ocfs2_xattr_search *xs,
4534                                         int len,
4535                                         struct ocfs2_xattr_set_ctxt *ctxt)
4536 {
4537         int ret, offset;
4538         struct ocfs2_xattr_entry *xe = xs->here;
4539         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4540
4541         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4542
4543         offset = xe - xh->xh_entries;
4544         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0],
4545                                                 offset, len, ctxt);
4546         if (ret)
4547                 mlog_errno(ret);
4548
4549         return ret;
4550 }
4551
4552 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4553                                                 handle_t *handle,
4554                                                 struct ocfs2_xattr_search *xs,
4555                                                 char *val,
4556                                                 int value_len)
4557 {
4558         int offset;
4559         struct ocfs2_xattr_value_root *xv;
4560         struct ocfs2_xattr_entry *xe = xs->here;
4561
4562         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4563
4564         offset = le16_to_cpu(xe->xe_name_offset) +
4565                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4566
4567         xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4568
4569         return __ocfs2_xattr_set_value_outside(inode, handle,
4570                                                xv, val, value_len);
4571 }
4572
4573 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4574                                   struct buffer_head *root_bh,
4575                                   u64 blkno,
4576                                   u32 cpos,
4577                                   u32 len)
4578 {
4579         int ret;
4580         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4581         struct inode *tl_inode = osb->osb_tl_inode;
4582         handle_t *handle;
4583         struct ocfs2_xattr_block *xb =
4584                         (struct ocfs2_xattr_block *)root_bh->b_data;
4585         struct ocfs2_alloc_context *meta_ac = NULL;
4586         struct ocfs2_cached_dealloc_ctxt dealloc;
4587         struct ocfs2_extent_tree et;
4588
4589         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4590
4591         ocfs2_init_dealloc_ctxt(&dealloc);
4592
4593         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4594              cpos, len, (unsigned long long)blkno);
4595
4596         ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4597
4598         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4599         if (ret) {
4600                 mlog_errno(ret);
4601                 return ret;
4602         }
4603
4604         mutex_lock(&tl_inode->i_mutex);
4605
4606         if (ocfs2_truncate_log_needs_flush(osb)) {
4607                 ret = __ocfs2_flush_truncate_log(osb);
4608                 if (ret < 0) {
4609                         mlog_errno(ret);
4610                         goto out;
4611                 }
4612         }
4613
4614         handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4615         if (IS_ERR(handle)) {
4616                 ret = -ENOMEM;
4617                 mlog_errno(ret);
4618                 goto out;
4619         }
4620
4621         ret = ocfs2_journal_access(handle, inode, root_bh,
4622                                    OCFS2_JOURNAL_ACCESS_WRITE);
4623         if (ret) {
4624                 mlog_errno(ret);
4625                 goto out_commit;
4626         }
4627
4628         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4629                                   &dealloc);
4630         if (ret) {
4631                 mlog_errno(ret);
4632                 goto out_commit;
4633         }
4634
4635         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4636
4637         ret = ocfs2_journal_dirty(handle, root_bh);
4638         if (ret) {
4639                 mlog_errno(ret);
4640                 goto out_commit;
4641         }
4642
4643         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4644         if (ret)
4645                 mlog_errno(ret);
4646
4647 out_commit:
4648         ocfs2_commit_trans(osb, handle);
4649 out:
4650         ocfs2_schedule_truncate_log_flush(osb, 1);
4651
4652         mutex_unlock(&tl_inode->i_mutex);
4653
4654         if (meta_ac)
4655                 ocfs2_free_alloc_context(meta_ac);
4656
4657         ocfs2_run_deallocs(osb, &dealloc);
4658
4659         return ret;
4660 }
4661
4662 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4663                                          handle_t *handle,
4664                                          struct ocfs2_xattr_search *xs)
4665 {
4666         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4667         struct ocfs2_xattr_entry *last = &xh->xh_entries[
4668                                                 le16_to_cpu(xh->xh_count) - 1];
4669         int ret = 0;
4670
4671         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4672                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4673         if (ret) {
4674                 mlog_errno(ret);
4675                 return;
4676         }
4677
4678         /* Remove the old entry. */
4679         memmove(xs->here, xs->here + 1,
4680                 (void *)last - (void *)xs->here);
4681         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4682         le16_add_cpu(&xh->xh_count, -1);
4683
4684         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4685 }
4686
4687 /*
4688  * Set the xattr name/value in the bucket specified in xs.
4689  *
4690  * As the new value in xi may be stored in the bucket or in an outside cluster,
4691  * we divide the whole process into 3 steps:
4692  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4693  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4694  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4695  * 4. If the clusters for the new outside value can't be allocated, we need
4696  *    to free the xattr we allocated in set.
4697  */
4698 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4699                                      struct ocfs2_xattr_info *xi,
4700                                      struct ocfs2_xattr_search *xs,
4701                                      struct ocfs2_xattr_set_ctxt *ctxt)
4702 {
4703         int ret, local = 1;
4704         size_t value_len;
4705         char *val = (char *)xi->value;
4706         struct ocfs2_xattr_entry *xe = xs->here;
4707         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4708                                               strlen(xi->name));
4709
4710         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4711                 /*
4712                  * We need to truncate the xattr storage first.
4713                  *
4714                  * If both the old and new value are stored to
4715                  * outside block, we only need to truncate
4716                  * the storage and then set the value outside.
4717                  *
4718                  * If the new value should be stored within block,
4719                  * we should free all the outside block first and
4720                  * the modification to the xattr block will be done
4721                  * by following steps.
4722                  */
4723                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4724                         value_len = xi->value_len;
4725                 else
4726                         value_len = 0;
4727
4728                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4729                                                            value_len,
4730                                                            ctxt);
4731                 if (ret)
4732                         goto out;
4733
4734                 if (value_len)
4735                         goto set_value_outside;
4736         }
4737
4738         value_len = xi->value_len;
4739         /* So we have to handle the inside block change now. */
4740         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4741                 /*
4742                  * If the new value will be stored outside of block,
4743                  * initalize a new empty value root and insert it first.
4744                  */
4745                 local = 0;
4746                 xi->value = &def_xv;
4747                 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4748         }
4749
4750         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
4751                                               name_hash, local);
4752         if (ret) {
4753                 mlog_errno(ret);
4754                 goto out;
4755         }
4756
4757         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
4758                 goto out;
4759
4760         /* allocate the space now for the outside block storage. */
4761         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4762                                                    value_len, ctxt);
4763         if (ret) {
4764                 mlog_errno(ret);
4765
4766                 if (xs->not_found) {
4767                         /*
4768                          * We can't allocate enough clusters for outside
4769                          * storage and we have allocated xattr already,
4770                          * so need to remove it.
4771                          */
4772                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
4773                 }
4774                 goto out;
4775         }
4776
4777 set_value_outside:
4778         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
4779                                                    xs, val, value_len);
4780 out:
4781         return ret;
4782 }
4783
4784 /*
4785  * check whether the xattr bucket is filled up with the same hash value.
4786  * If we want to insert the xattr with the same hash, return -ENOSPC.
4787  * If we want to insert a xattr with different hash value, go ahead
4788  * and ocfs2_divide_xattr_bucket will handle this.
4789  */
4790 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4791                                               struct ocfs2_xattr_bucket *bucket,
4792                                               const char *name)
4793 {
4794         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4795         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4796
4797         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
4798                 return 0;
4799
4800         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4801             xh->xh_entries[0].xe_name_hash) {
4802                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4803                      "hash = %u\n",
4804                      (unsigned long long)bucket_blkno(bucket),
4805                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4806                 return -ENOSPC;
4807         }
4808
4809         return 0;
4810 }
4811
4812 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4813                                              struct ocfs2_xattr_info *xi,
4814                                              struct ocfs2_xattr_search *xs,
4815                                              struct ocfs2_xattr_set_ctxt *ctxt)
4816 {
4817         struct ocfs2_xattr_header *xh;
4818         struct ocfs2_xattr_entry *xe;
4819         u16 count, header_size, xh_free_start;
4820         int free, max_free, need, old;
4821         size_t value_size = 0, name_len = strlen(xi->name);
4822         size_t blocksize = inode->i_sb->s_blocksize;
4823         int ret, allocation = 0;
4824
4825         mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4826
4827 try_again:
4828         xh = xs->header;
4829         count = le16_to_cpu(xh->xh_count);
4830         xh_free_start = le16_to_cpu(xh->xh_free_start);
4831         header_size = sizeof(struct ocfs2_xattr_header) +
4832                         count * sizeof(struct ocfs2_xattr_entry);
4833         max_free = OCFS2_XATTR_BUCKET_SIZE -
4834                 le16_to_cpu(xh->xh_name_value_len) - header_size;
4835
4836         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4837                         "of %u which exceed block size\n",
4838                         (unsigned long long)bucket_blkno(xs->bucket),
4839                         header_size);
4840
4841         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4842                 value_size = OCFS2_XATTR_ROOT_SIZE;
4843         else if (xi->value)
4844                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
4845
4846         if (xs->not_found)
4847                 need = sizeof(struct ocfs2_xattr_entry) +
4848                         OCFS2_XATTR_SIZE(name_len) + value_size;
4849         else {
4850                 need = value_size + OCFS2_XATTR_SIZE(name_len);
4851
4852                 /*
4853                  * We only replace the old value if the new length is smaller
4854                  * than the old one. Otherwise we will allocate new space in the
4855                  * bucket to store it.
4856                  */
4857                 xe = xs->here;
4858                 if (ocfs2_xattr_is_local(xe))
4859                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4860                 else
4861                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4862
4863                 if (old >= value_size)
4864                         need = 0;
4865         }
4866
4867         free = xh_free_start - header_size;
4868         /*
4869          * We need to make sure the new name/value pair
4870          * can exist in the same block.
4871          */
4872         if (xh_free_start % blocksize < need)
4873                 free -= xh_free_start % blocksize;
4874
4875         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4876              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4877              " %u\n", xs->not_found,
4878              (unsigned long long)bucket_blkno(xs->bucket),
4879              free, need, max_free, le16_to_cpu(xh->xh_free_start),
4880              le16_to_cpu(xh->xh_name_value_len));
4881
4882         if (free < need ||
4883             (xs->not_found &&
4884              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
4885                 if (need <= max_free &&
4886                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4887                         /*
4888                          * We can create the space by defragment. Since only the
4889                          * name/value will be moved, the xe shouldn't be changed
4890                          * in xs.
4891                          */
4892                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
4893                                                         xs->bucket);
4894                         if (ret) {
4895                                 mlog_errno(ret);
4896                                 goto out;
4897                         }
4898
4899                         xh_free_start = le16_to_cpu(xh->xh_free_start);
4900                         free = xh_free_start - header_size;
4901                         if (xh_free_start % blocksize < need)
4902                                 free -= xh_free_start % blocksize;
4903
4904                         if (free >= need)
4905                                 goto xattr_set;
4906
4907                         mlog(0, "Can't get enough space for xattr insert by "
4908                              "defragment. Need %u bytes, but we have %d, so "
4909                              "allocate new bucket for it.\n", need, free);
4910                 }
4911
4912                 /*
4913                  * We have to add new buckets or clusters and one
4914                  * allocation should leave us enough space for insert.
4915                  */
4916                 BUG_ON(allocation);
4917
4918                 /*
4919                  * We do not allow for overlapping ranges between buckets. And
4920                  * the maximum number of collisions we will allow for then is
4921                  * one bucket's worth, so check it here whether we need to
4922                  * add a new bucket for the insert.
4923                  */
4924                 ret = ocfs2_check_xattr_bucket_collision(inode,
4925                                                          xs->bucket,
4926                                                          xi->name);
4927                 if (ret) {
4928                         mlog_errno(ret);
4929                         goto out;
4930                 }
4931
4932                 ret = ocfs2_add_new_xattr_bucket(inode,
4933                                                  xs->xattr_bh,
4934                                                  xs->bucket->bu_bhs[0],
4935                                                  ctxt);
4936                 if (ret) {
4937                         mlog_errno(ret);
4938                         goto out;
4939                 }
4940
4941                 ocfs2_xattr_bucket_relse(xs->bucket);
4942
4943                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4944                                                    xi->name_index,
4945                                                    xi->name, xs);
4946                 if (ret && ret != -ENODATA)
4947                         goto out;
4948                 xs->not_found = ret;
4949                 allocation = 1;
4950                 goto try_again;
4951         }
4952
4953 xattr_set:
4954         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
4955 out:
4956         mlog_exit(ret);
4957         return ret;
4958 }
4959
4960 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4961                                         struct ocfs2_xattr_bucket *bucket,
4962                                         void *para)
4963 {
4964         int ret = 0;
4965         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4966         u16 i;
4967         struct ocfs2_xattr_entry *xe;
4968         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4969         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
4970
4971         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
4972
4973         ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4974         if (IS_ERR(ctxt.handle)) {
4975                 ret = PTR_ERR(ctxt.handle);
4976                 mlog_errno(ret);
4977                 goto out;
4978         }
4979
4980         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4981                 xe = &xh->xh_entries[i];
4982                 if (ocfs2_xattr_is_local(xe))
4983                         continue;
4984
4985                 ret = ocfs2_xattr_bucket_value_truncate(inode,
4986                                                         bucket->bu_bhs[0],
4987                                                         i, 0, &ctxt);
4988                 if (ret) {
4989                         mlog_errno(ret);
4990                         break;
4991                 }
4992         }
4993
4994         ret = ocfs2_commit_trans(osb, ctxt.handle);
4995         ocfs2_schedule_truncate_log_flush(osb, 1);
4996         ocfs2_run_deallocs(osb, &ctxt.dealloc);
4997 out:
4998         return ret;
4999 }
5000
5001 static int ocfs2_delete_xattr_index_block(struct inode *inode,
5002                                           struct buffer_head *xb_bh)
5003 {
5004         struct ocfs2_xattr_block *xb =
5005                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5006         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
5007         int ret = 0;
5008         u32 name_hash = UINT_MAX, e_cpos, num_clusters;
5009         u64 p_blkno;
5010
5011         if (le16_to_cpu(el->l_next_free_rec) == 0)
5012                 return 0;
5013
5014         while (name_hash > 0) {
5015                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
5016                                           &e_cpos, &num_clusters, el);
5017                 if (ret) {
5018                         mlog_errno(ret);
5019                         goto out;
5020                 }
5021
5022                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
5023                                                   ocfs2_delete_xattr_in_bucket,
5024                                                   NULL);
5025                 if (ret) {
5026                         mlog_errno(ret);
5027                         goto out;
5028                 }
5029
5030                 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
5031                                              p_blkno, e_cpos, num_clusters);
5032                 if (ret) {
5033                         mlog_errno(ret);
5034                         break;
5035                 }
5036
5037                 if (e_cpos == 0)
5038                         break;
5039
5040                 name_hash = e_cpos - 1;
5041         }
5042
5043 out:
5044         return ret;
5045 }
5046
5047 /*
5048  * 'security' attributes support
5049  */
5050 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
5051                                         size_t list_size, const char *name,
5052                                         size_t name_len)
5053 {
5054         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
5055         const size_t total_len = prefix_len + name_len + 1;
5056
5057         if (list && total_len <= list_size) {
5058                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
5059                 memcpy(list + prefix_len, name, name_len);
5060                 list[prefix_len + name_len] = '\0';
5061         }
5062         return total_len;
5063 }
5064
5065 static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
5066                                     void *buffer, size_t size)
5067 {
5068         if (strcmp(name, "") == 0)
5069                 return -EINVAL;
5070         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
5071                                buffer, size);
5072 }
5073
5074 static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
5075                                     const void *value, size_t size, int flags)
5076 {
5077         if (strcmp(name, "") == 0)
5078                 return -EINVAL;
5079
5080         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
5081                                size, flags);
5082 }
5083
5084 int ocfs2_init_security_get(struct inode *inode,
5085                             struct inode *dir,
5086                             struct ocfs2_security_xattr_info *si)
5087 {
5088         return security_inode_init_security(inode, dir, &si->name, &si->value,
5089                                             &si->value_len);
5090 }
5091
5092 int ocfs2_init_security_set(handle_t *handle,
5093                             struct inode *inode,
5094                             struct buffer_head *di_bh,
5095                             struct ocfs2_security_xattr_info *si,
5096                             struct ocfs2_alloc_context *xattr_ac,
5097                             struct ocfs2_alloc_context *data_ac)
5098 {
5099         return ocfs2_xattr_set_handle(handle, inode, di_bh,
5100                                      OCFS2_XATTR_INDEX_SECURITY,
5101                                      si->name, si->value, si->value_len, 0,
5102                                      xattr_ac, data_ac);
5103 }
5104
5105 struct xattr_handler ocfs2_xattr_security_handler = {
5106         .prefix = XATTR_SECURITY_PREFIX,
5107         .list   = ocfs2_xattr_security_list,
5108         .get    = ocfs2_xattr_security_get,
5109         .set    = ocfs2_xattr_security_set,
5110 };
5111
5112 /*
5113  * 'trusted' attributes support
5114  */
5115 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
5116                                        size_t list_size, const char *name,
5117                                        size_t name_len)
5118 {
5119         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
5120         const size_t total_len = prefix_len + name_len + 1;
5121
5122         if (list && total_len <= list_size) {
5123                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
5124                 memcpy(list + prefix_len, name, name_len);
5125                 list[prefix_len + name_len] = '\0';
5126         }
5127         return total_len;
5128 }
5129
5130 static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
5131                                    void *buffer, size_t size)
5132 {
5133         if (strcmp(name, "") == 0)
5134                 return -EINVAL;
5135         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
5136                                buffer, size);
5137 }
5138
5139 static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
5140                                    const void *value, size_t size, int flags)
5141 {
5142         if (strcmp(name, "") == 0)
5143                 return -EINVAL;
5144
5145         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
5146                                size, flags);
5147 }
5148
5149 struct xattr_handler ocfs2_xattr_trusted_handler = {
5150         .prefix = XATTR_TRUSTED_PREFIX,
5151         .list   = ocfs2_xattr_trusted_list,
5152         .get    = ocfs2_xattr_trusted_get,
5153         .set    = ocfs2_xattr_trusted_set,
5154 };
5155
5156 /*
5157  * 'user' attributes support
5158  */
5159 static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
5160                                     size_t list_size, const char *name,
5161                                     size_t name_len)
5162 {
5163         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
5164         const size_t total_len = prefix_len + name_len + 1;
5165         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5166
5167         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5168                 return 0;
5169
5170         if (list && total_len <= list_size) {
5171                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
5172                 memcpy(list + prefix_len, name, name_len);
5173                 list[prefix_len + name_len] = '\0';
5174         }
5175         return total_len;
5176 }
5177
5178 static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
5179                                 void *buffer, size_t size)
5180 {
5181         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5182
5183         if (strcmp(name, "") == 0)
5184                 return -EINVAL;
5185         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5186                 return -EOPNOTSUPP;
5187         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
5188                                buffer, size);
5189 }
5190
5191 static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
5192                                 const void *value, size_t size, int flags)
5193 {
5194         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5195
5196         if (strcmp(name, "") == 0)
5197                 return -EINVAL;
5198         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5199                 return -EOPNOTSUPP;
5200
5201         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
5202                                size, flags);
5203 }
5204
5205 struct xattr_handler ocfs2_xattr_user_handler = {
5206         .prefix = XATTR_USER_PREFIX,
5207         .list   = ocfs2_xattr_user_list,
5208         .get    = ocfs2_xattr_user_get,
5209         .set    = ocfs2_xattr_user_set,
5210 };