Merge branch 'upstream' of git://ftp.linux-mips.org/pub/scm/upstream-linus
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 Jan 2008 11:48:03 +0000 (22:48 +1100)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 Jan 2008 11:48:03 +0000 (22:48 +1100)
* 'upstream' of git://ftp.linux-mips.org/pub/scm/upstream-linus: (68 commits)
  [MIPS] remove Documentation/mips/GT64120.README
  [MIPS] Malta: remaining bits of the board support code cleanup
  [MIPS] Malta: make the helper function static
  [MIPS] Malta: fix braces at single statement blocks
  [MIPS] Malta, Atlas: move an extern function declaration to the header file
  [MIPS] Malta: Use C89 style for comments
  [MIPS] Malta: else should follow close brace in malta_int.c
  [MIPS] Malta: remove a superfluous comment
  [MIPS] Malta: include <linux/cpu.h> instead of <asm/cpu.h>
  [MIPS] Malta, Atlas, Sead: remove an extern from .c files
  [MIPS] Malta: fix oversized lines in malta_int.c
  [MIPS] Malta: remove a dead function declaration
  [MIPS] Malta: use tabs not spaces
  [MIPS] Malta: set up the screen info in a separate function
  [MIPS] Malta: check the PCI clock frequency in a separate function
  [MIPS] Malta: use the KERN_ facility level in printk()
  [MIPS] Malta: use Linux kernel style for structure initialization
  [MIPS]: constify function pointer tables
  [MIPS] compat: handle argument endianess of sys32_(f)truncate64 with merge_64
  [MIPS] Cobalt 64-bits kernels can be safely unmarked experimental
  ...

161 files changed:
.gitignore
Documentation/filesystems/ext4.txt
Documentation/filesystems/proc.txt
Documentation/kbuild/kconfig-language.txt
Makefile
arch/alpha/kernel/vmlinux.lds.S
arch/alpha/lib/dec_and_lock.c
arch/arm/kernel/vmlinux.lds.S
arch/arm/mach-imx/Makefile
arch/arm/mach-netx/Makefile
arch/avr32/kernel/vmlinux.lds.S
arch/blackfin/kernel/vmlinux.lds.S
arch/cris/arch-v10/vmlinux.lds.S
arch/cris/arch-v32/boot/compressed/Makefile
arch/cris/arch-v32/vmlinux.lds.S
arch/frv/boot/Makefile
arch/frv/kernel/gdb-stub.c
arch/frv/kernel/vmlinux.lds.S
arch/h8300/kernel/vmlinux.lds.S
arch/ia64/kernel/vmlinux.lds.S
arch/m32r/kernel/vmlinux.lds.S
arch/m68k/kernel/vmlinux-std.lds
arch/m68k/kernel/vmlinux-sun3.lds
arch/m68knommu/kernel/vmlinux.lds.S
arch/mips/kernel/vmlinux.lds.S
arch/mips/tx4927/common/Makefile
arch/mips/tx4938/common/Makefile
arch/mips/tx4938/toshiba_rbtx4938/Makefile
arch/parisc/kernel/vmlinux.lds.S
arch/powerpc/boot/Makefile
arch/powerpc/kernel/sysfs.c
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/oprofile/op_model_power4.c
arch/ppc/kernel/vmlinux.lds.S
arch/s390/kernel/vmlinux.lds.S
arch/sh/kernel/vmlinux_32.lds.S
arch/sh/kernel/vmlinux_64.lds.S
arch/sparc/kernel/vmlinux.lds.S
arch/sparc64/kernel/unaligned.c
arch/sparc64/kernel/vmlinux.lds.S
arch/um/include/init.h
arch/um/kernel/dyn.lds.S
arch/um/kernel/uml.lds.S
arch/v850/kernel/vmlinux.lds.S
arch/x86/kernel/vmlinux_32.lds.S
arch/x86/kernel/vmlinux_64.lds.S
arch/xtensa/kernel/vmlinux.lds.S
arch/xtensa/mm/Makefile
arch/xtensa/platform-iss/Makefile
drivers/base/power/Makefile
drivers/infiniband/hw/cxgb3/Makefile
drivers/rapidio/rio.h
fs/Kconfig
fs/afs/dir.c
fs/afs/inode.c
fs/buffer.c
fs/compat_ioctl.c
fs/ext2/super.c
fs/ext3/super.c
fs/ext4/Makefile
fs/ext4/balloc.c
fs/ext4/dir.c
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/group.h
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c [new file with mode: 0644]
fs/ext4/migrate.c [new file with mode: 0644]
fs/ext4/namei.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/inode.c
fs/jbd2/checkpoint.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/recovery.c
fs/jbd2/revoke.c
fs/jbd2/transaction.c
fs/ocfs2/cluster/sys.c
fs/read_write.c
fs/smbfs/Makefile
include/asm-arm/bitops.h
include/asm-avr32/setup.h
include/asm-generic/bitops/ext2-non-atomic.h
include/asm-generic/bitops/le.h
include/asm-generic/vmlinux.lds.h
include/asm-ia64/gcc_intrin.h
include/asm-m68k/bitops.h
include/asm-m68knommu/bitops.h
include/asm-powerpc/bitops.h
include/asm-s390/bitops.h
include/asm-sh/machvec.h
include/asm-sh/thread_info.h
include/asm-x86/thread_info_32.h
include/linux/Kbuild
include/linux/buffer_head.h
include/linux/compiler-gcc3.h
include/linux/compiler-gcc4.h
include/linux/compiler.h
include/linux/elfnote.h
include/linux/ext4_fs.h
include/linux/ext4_fs_extents.h
include/linux/ext4_fs_i.h
include/linux/ext4_fs_sb.h
include/linux/fs.h
include/linux/init.h
include/linux/jbd2.h
include/linux/module.h
include/linux/moduleparam.h
include/linux/pci.h
init/Kconfig
kernel/extable.c
kernel/kallsyms.c
kernel/module.c
kernel/params.c
lib/Kconfig.debug
lib/find_next_bit.c
scripts/Makefile.build
scripts/Makefile.lib
scripts/Makefile.modinst
scripts/Makefile.modpost
scripts/basic/docproc.c
scripts/decodecode
scripts/gcc-version.sh
scripts/genksyms/genksyms.c
scripts/kconfig/Makefile
scripts/kconfig/POTFILES.in
scripts/kconfig/conf.c
scripts/kconfig/confdata.c
scripts/kconfig/expr.c
scripts/kconfig/expr.h
scripts/kconfig/gconf.c
scripts/kconfig/lex.zconf.c_shipped
scripts/kconfig/lkc.h
scripts/kconfig/lxdialog/check-lxdialog.sh
scripts/kconfig/lxdialog/checklist.c
scripts/kconfig/lxdialog/dialog.h
scripts/kconfig/lxdialog/inputbox.c
scripts/kconfig/lxdialog/menubox.c
scripts/kconfig/lxdialog/textbox.c
scripts/kconfig/lxdialog/util.c
scripts/kconfig/lxdialog/yesno.c
scripts/kconfig/mconf.c
scripts/kconfig/menu.c
scripts/kconfig/qconf.cc
scripts/kconfig/symbol.c
scripts/kconfig/util.c
scripts/kconfig/zconf.gperf
scripts/kconfig/zconf.hash.c_shipped
scripts/kconfig/zconf.l
scripts/kernel-doc
scripts/mkmakefile
scripts/mod/modpost.c
scripts/mod/modpost.h
scripts/package/Makefile
scripts/package/buildtar
scripts/patch-kernel
scripts/setlocalversion

index 8d14531846b95bfa3564b58ccfb7913a034323b8..8363e48cdcdc67bad8834a08ec8dd57b8cb41635 100644 (file)
@@ -17,6 +17,7 @@
 *.i
 *.lst
 *.symtypes
+*.order
 
 #
 # Top-level generic files
index 6a4adcae9f9a699d624a0bb114f8e0d5f86161c5..560f88dc7090dadcfd7c2db4fc6821c64b8da3fe 100644 (file)
@@ -86,9 +86,21 @@ Alex is working on a new set of patches right now.
 When mounting an ext4 filesystem, the following option are accepted:
 (*) == default
 
-extents                        ext4 will use extents to address file data.  The
+extents                (*)     ext4 will use extents to address file data.  The
                        file system will no longer be mountable by ext3.
 
+noextents              ext4 will not use extents for newly created files
+
+journal_checksum       Enable checksumming of the journal transactions.
+                       This will allow the recovery code in e2fsck and the
+                       kernel to detect corruption in the kernel.  It is a
+                       compatible change and will be ignored by older kernels.
+
+journal_async_commit   Commit block can be written to disk without waiting
+                       for descriptor blocks. If enabled older kernels cannot
+                       mount the device. This will enable 'journal_checksum'
+                       internally.
+
 journal=update         Update the ext4 file system's journal to the current
                        format.
 
@@ -196,6 +208,12 @@ nobh                       (a) cache disk block mapping information
                        "nobh" option tries to avoid associating buffer
                        heads (supported only for "writeback" mode).
 
+mballoc                (*)     Use the multiple block allocator for block allocation
+nomballoc              disabled multiple block allocator for block allocation.
+stripe=n               Number of filesystem blocks that mballoc will try
+                       to use for allocation size and alignment. For RAID5/6
+                       systems this should be the number of data
+                       disks *  RAID chunk size in file system blocks.
 
 Data Mode
 ---------
index dec99455321fdf86018ca5b2b62936103fd73031..4413a2d4646fc4fd5305cb60e06a57bd5243bd16 100644 (file)
@@ -857,6 +857,45 @@ CPUs.
 The   "procs_blocked" line gives  the  number of  processes currently blocked,
 waiting for I/O to complete.
 
+1.9 Ext4 file system parameters
+------------------------------
+Ext4 file system have one directory per partition under /proc/fs/ext4/
+# ls /proc/fs/ext4/hdc/
+group_prealloc  max_to_scan  mb_groups  mb_history  min_to_scan  order2_req
+stats  stream_req
+
+mb_groups:
+This file gives the details of mutiblock allocator buddy cache of free blocks
+
+mb_history:
+Multiblock allocation history.
+
+stats:
+This file indicate whether the multiblock allocator should start collecting
+statistics. The statistics are shown during unmount
+
+group_prealloc:
+The multiblock allocator normalize the block allocation request to
+group_prealloc filesystem blocks if we don't have strip value set.
+The stripe value can be specified at mount time or during mke2fs.
+
+max_to_scan:
+How long multiblock allocator can look for a best extent (in found extents)
+
+min_to_scan:
+How long multiblock allocator  must look for a best extent
+
+order2_req:
+Multiblock allocator use  2^N search using buddies only for requests greater
+than or equal to order2_req. The request size is specfied in file system
+blocks. A value of 2 indicate only if the requests are greater than or equal
+to 4 blocks.
+
+stream_req:
+Files smaller than stream_req are served by the stream allocator, whose
+purpose is to pack requests as close each to other as possible to
+produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
+filesystem block size will use group based preallocation.
 
 ------------------------------------------------------------------------------
 Summary
index 616043a6da99a0afd945e3cb5acb93b26bbb3f17..649cb87998900e235afe6b83c5e9f10077be2085 100644 (file)
@@ -24,7 +24,7 @@ visible if its parent entry is also visible.
 Menu entries
 ------------
 
-Most entries define a config option, all other entries help to organize
+Most entries define a config option; all other entries help to organize
 them. A single configuration option is defined like this:
 
 config MODVERSIONS
@@ -50,7 +50,7 @@ applicable everywhere (see syntax).
 
 - type definition: "bool"/"tristate"/"string"/"hex"/"int"
   Every config option must have a type. There are only two basic types:
-  tristate and string, the other types are based on these two. The type
+  tristate and string; the other types are based on these two. The type
   definition optionally accepts an input prompt, so these two examples
   are equivalent:
 
@@ -108,7 +108,7 @@ applicable everywhere (see syntax).
        equal to 'y' without visiting the dependencies. So abusing
        select you are able to select a symbol FOO even if FOO depends
        on BAR that is not set. In general use select only for
-       non-visible symbols (no promts anywhere) and for symbols with
+       non-visible symbols (no prompts anywhere) and for symbols with
        no dependencies. That will limit the usefulness but on the
        other hand avoid the illegal configurations all over. kconfig
        should one day warn about such things.
@@ -127,6 +127,27 @@ applicable everywhere (see syntax).
   used to help visually separate configuration logic from help within
   the file as an aid to developers.
 
+- misc options: "option" <symbol>[=<value>]
+  Various less common options can be defined via this option syntax,
+  which can modify the behaviour of the menu entry and its config
+  symbol. These options are currently possible:
+
+  - "defconfig_list"
+    This declares a list of default entries which can be used when
+    looking for the default configuration (which is used when the main
+    .config doesn't exists yet.)
+
+  - "modules"
+    This declares the symbol to be used as the MODULES symbol, which
+    enables the third modular state for all config symbols.
+
+  - "env"=<value>
+    This imports the environment variable into Kconfig. It behaves like
+    a default, except that the value comes from the environment, this
+    also means that the behaviour when mixing it with normal defaults is
+    undefined at this point. The symbol is currently not exported back
+    to the build environment (if this is desired, it can be done via
+    another symbol).
 
 Menu dependencies
 -----------------
@@ -162,9 +183,9 @@ An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
 respectively for calculations). A menu entry becomes visible when it's
 expression evaluates to 'm' or 'y'.
 
-There are two types of symbols: constant and nonconstant symbols.
-Nonconstant symbols are the most common ones and are defined with the
-'config' statement. Nonconstant symbols consist entirely of alphanumeric
+There are two types of symbols: constant and non-constant symbols.
+Non-constant symbols are the most common ones and are defined with the
+'config' statement. Non-constant symbols consist entirely of alphanumeric
 characters or underscores.
 Constant symbols are only part of expressions. Constant symbols are
 always surrounded by single or double quotes. Within the quote, any
@@ -301,3 +322,81 @@ mainmenu:
 
 This sets the config program's title bar if the config program chooses
 to use it.
+
+
+Kconfig hints
+-------------
+This is a collection of Kconfig tips, most of which aren't obvious at
+first glance and most of which have become idioms in several Kconfig
+files.
+
+Adding common features and make the usage configurable
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It is a common idiom to implement a feature/functionality that are
+relevant for some architectures but not all.
+The recommended way to do so is to use a config variable named HAVE_*
+that is defined in a common Kconfig file and selected by the relevant
+architectures.
+An example is the generic IOMAP functionality.
+
+We would in lib/Kconfig see:
+
+# Generic IOMAP is used to ...
+config HAVE_GENERIC_IOMAP
+
+config GENERIC_IOMAP
+       depends on HAVE_GENERIC_IOMAP && FOO
+
+And in lib/Makefile we would see:
+obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+
+For each architecture using the generic IOMAP functionality we would see:
+
+config X86
+       select ...
+       select HAVE_GENERIC_IOMAP
+       select ...
+
+Note: we use the existing config option and avoid creating a new
+config variable to select HAVE_GENERIC_IOMAP.
+
+Note: the use of the internal config variable HAVE_GENERIC_IOMAP, it is
+introduced to overcome the limitation of select which will force a
+config option to 'y' no matter the dependencies.
+The dependencies are moved to the symbol GENERIC_IOMAP and we avoid the
+situation where select forces a symbol equals to 'y'.
+
+Build as module only
+~~~~~~~~~~~~~~~~~~~~
+To restrict a component build to module-only, qualify its config symbol
+with "depends on m".  E.g.:
+
+config FOO
+       depends on BAR && m
+
+limits FOO to module (=m) or disabled (=n).
+
+
+Build limited by a third config symbol which may be =y or =m
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A common idiom that we see (and sometimes have problems with) is this:
+
+When option C in B (module or subsystem) uses interfaces from A (module
+or subsystem), and both A and B are tristate (could be =y or =m if they
+were independent of each other, but they aren't), then we need to limit
+C such that it cannot be built statically if A is built as a loadable
+module.  (C already depends on B, so there is no dependency issue to
+take care of here.)
+
+If A is linked statically into the kernel image, C can be built
+statically or as loadable module(s).  However, if A is built as loadable
+module(s), then C must be restricted to loadable module(s) also.  This
+can be expressed in kconfig language as:
+
+config C
+       depends on A = y || A = B
+
+or for real examples, use this command in a kernel tree:
+
+$ find . -name Kconfig\* | xargs grep -ns "depends on.*=.*||.*=" | grep -v orig
+
index 6d419f67939c29f28a2b003272ce70d9881bc769..0f84c742ed0e1c8363d27121fb2ce563fb582bc7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -520,6 +520,11 @@ KBUILD_CFLAGS      += -g
 KBUILD_AFLAGS  += -gdwarf-2
 endif
 
+# We trigger additional mismatches with less inlining
+ifdef CONFIG_DEBUG_SECTION_MISMATCH
+KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
+endif
+
 # Force gcc to behave correct even for buggy distributions
 KBUILD_CFLAGS         += $(call cc-option, -fno-stack-protector)
 
@@ -793,7 +798,7 @@ define rule_vmlinux-modpost
 endef
 
 # vmlinux image - including updated kernel symbols
-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) vmlinux.o FORCE
+vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE
 ifdef CONFIG_HEADERS_CHECK
        $(Q)$(MAKE) -f $(srctree)/Makefile headers_check
 endif
@@ -804,7 +809,9 @@ endif
        $(call if_changed_rule,vmlinux__)
        $(Q)rm -f .old_version
 
-vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE
+# build vmlinux.o first to catch section mismatch errors early
+$(kallsyms.o): vmlinux.o
+vmlinux.o: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE
        $(call if_changed_rule,vmlinux-modpost)
 
 # The actual objects are generated when descending, 
@@ -1021,9 +1028,14 @@ ifdef CONFIG_MODULES
 all: modules
 
 #      Build modules
+#
+#      A module can be listed more than once in obj-m resulting in
+#      duplicate lines in modules.order files.  Those are removed
+#      using awk while concatenating to the final file.
 
 PHONY += modules
 modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux)
+       $(Q)$(AWK) '!x[$$0]++' $(vmlinux-dirs:%=$(objtree)/%/modules.order) > $(objtree)/modules.order
        @echo '  Building modules, stage 2.';
        $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
 
@@ -1051,6 +1063,7 @@ _modinst_:
                rm -f $(MODLIB)/build ; \
                ln -s $(objtree) $(MODLIB)/build ; \
        fi
+       @cp -f $(objtree)/modules.order $(MODLIB)/
        $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
 
 # This depmod is only for convenience to give the initial
@@ -1110,7 +1123,7 @@ clean: archclean $(clean-dirs)
        @find . $(RCS_FIND_IGNORE) \
                \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
                -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
-               -o -name '*.symtypes' \) \
+               -o -name '*.symtypes' -o -name 'modules.order' \) \
                -type f -print | xargs rm -f
 
 # mrproper - Delete all generated files, including .config
@@ -1175,7 +1188,7 @@ help:
        @echo  '  dir/            - Build all files in dir and below'
        @echo  '  dir/file.[ois]  - Build specified target only'
        @echo  '  dir/file.ko     - Build module including final link'
-       @echo  '  rpm             - Build a kernel as an RPM package'
+       @echo  '  prepare         - Set up for building external modules'
        @echo  '  tags/TAGS       - Generate tags file for editors'
        @echo  '  cscope          - Generate cscope index'
        @echo  '  kernelrelease   - Output the release version string'
@@ -1188,6 +1201,8 @@ help:
        @echo  'Static analysers'
        @echo  '  checkstack      - Generate a list of stack hogs'
        @echo  '  namespacecheck  - Name space analysis on compiled kernel'
+       @echo  '  versioncheck    - Sanity check on version.h usage'
+       @echo  '  includecheck    - Check for duplicate included header files'
        @echo  '  export_report   - List the usages of all exported symbols'
        @if [ -r $(srctree)/include/asm-$(SRCARCH)/Kbuild ]; then \
         echo  '  headers_check   - Sanity check on exported headers'; \
@@ -1371,6 +1386,7 @@ define xtags
        if $1 --version 2>&1 | grep -iq exuberant; then \
            $(all-sources) | xargs $1 -a \
                -I __initdata,__exitdata,__acquires,__releases \
+               -I __read_mostly,____cacheline_aligned,____cacheline_aligned_in_smp,____cacheline_internodealigned_in_smp \
                -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \
                --extra=+f --c-kinds=+px \
                --regex-asm='/^ENTRY\(([^)]*)\).*/\1/'; \
@@ -1428,12 +1444,12 @@ tags: FORCE
 includecheck:
        find * $(RCS_FIND_IGNORE) \
                -name '*.[hcS]' -type f -print | sort \
-               | xargs $(PERL) -w scripts/checkincludes.pl
+               | xargs $(PERL) -w $(srctree)/scripts/checkincludes.pl
 
 versioncheck:
        find * $(RCS_FIND_IGNORE) \
                -name '*.[hcS]' -type f -print | sort \
-               | xargs $(PERL) -w scripts/checkversion.pl
+               | xargs $(PERL) -w $(srctree)/scripts/checkversion.pl
 
 namespacecheck:
        $(PERL) $(srctree)/scripts/namespace.pl
index 55c05b511f4c060d5a8cab8f7140d6f07563bd0f..f13249be17c50736e23686158854799164582aeb 100644 (file)
@@ -46,11 +46,11 @@ SECTIONS
        __init_begin = .;
        .init.text : {
                _sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                _einittext = .;
        }
        .init.data : {
-               *(.init.data)
+               INIT_DATA
        }
 
        . = ALIGN(16);
@@ -136,8 +136,8 @@ SECTIONS
 
        /* Sections to be discarded */
        /DISCARD/ : {
-               *(.exit.text)
-               *(.exit.data)
+               EXIT_TEXT
+               EXIT_DATA
                *(.exitcall.exit)
        }
 
index 6ae2500a9d9e8dec21fd4fb05302ef1c6dae11ca..0f5520d2f45fc41d72d2d333fb291c50517bfa3c 100644 (file)
@@ -30,8 +30,7 @@ _atomic_dec_and_lock:                         \n\
        .previous                               \n\
        .end _atomic_dec_and_lock");
 
-static int __attribute_used__
-atomic_dec_and_lock_1(atomic_t *atomic, spinlock_t *lock)
+static int __used atomic_dec_and_lock_1(atomic_t *atomic, spinlock_t *lock)
 {
        /* Slow path */
        spin_lock(lock);
index 30f732c7fdb505b9762b6d12f4fbc6ebf5ebb5ae..4898bdcfe7dd638b5c4d9962f3932244a1249a38 100644 (file)
@@ -30,7 +30,7 @@ SECTIONS
        }
 
        .init : {                       /* Init code and data           */
-                       *(.init.text)
+                       INIT_TEXT
                _einittext = .;
                __proc_info_begin = .;
                        *(.proc.info.init)
@@ -70,15 +70,15 @@ SECTIONS
                __per_cpu_end = .;
 #ifndef CONFIG_XIP_KERNEL
                __init_begin = _stext;
-               *(.init.data)
+               INIT_DATA
                . = ALIGN(4096);
                __init_end = .;
 #endif
        }
 
        /DISCARD/ : {                   /* Exit code and data           */
-               *(.exit.text)
-               *(.exit.data)
+               EXIT_TEXT
+               EXIT_DATA
                *(.exitcall.exit)
 #ifndef CONFIG_MMU
                *(.fixup)
@@ -130,7 +130,7 @@ SECTIONS
 #ifdef CONFIG_XIP_KERNEL
                . = ALIGN(4096);
                __init_begin = .;
-               *(.init.data)
+               INIT_DATA
                . = ALIGN(4096);
                __init_end = .;
 #endif
index 02272aa36e90c69e433b465ab799f08574478a5d..88d5e61a2e13d6f85a90d77d8219c0f11474392d 100644 (file)
@@ -1,9 +1,6 @@
 #
 # Makefile for the linux kernel.
 #
-# Note! Dependencies are done automagically by 'make dep', which also
-# removes any old dependencies. DON'T put your own dependencies here
-# unless it's something special (ie not a .c file).
 
 # Object file lists.
 
index 18785ff37657f71a272d33e2b5057ec371ae1e3d..7ce4ba9eb242b1ec13855d918d625368d5d7220b 100644 (file)
@@ -1,9 +1,6 @@
 #
 # Makefile for the linux kernel.
 #
-# Note! Dependencies are done automagically by 'make dep', which also
-# removes any old dependencies. DON'T put your own dependencies here
-# unless it's something special (ie not a .c file).
 
 # Object file lists.
 
index 11f08e35a2eb36df6ee235549a0f9779f3120f91..481cfd40c0539e85297ed33366ec579041d95680 100644 (file)
@@ -27,19 +27,19 @@ SECTIONS
                __init_begin = .;
                        _sinittext = .;
                        *(.text.reset)
-                       *(.init.text)
+                       INIT_TEXT
                        /*
                         * .exit.text is discarded at runtime, not
                         * link time, to deal with references from
                         * __bug_table
                         */
-                       *(.exit.text)
+                       EXIT_TEXT
                        _einittext = .;
                . = ALIGN(4);
                __tagtable_begin = .;
                        *(.taglist.init)
                __tagtable_end = .;
-                       *(.init.data)
+                       INIT_DATA
                . = ALIGN(16);
                __setup_start = .;
                        *(.init.setup)
@@ -135,7 +135,7 @@ SECTIONS
         * thrown away, as cleanup code is never called unless it's a module.
         */
        /DISCARD/               : {
-               *(.exit.data)
+               EXIT_DATA
                *(.exitcall.exit)
        }
 
index 9b75bc83c71fac9847cd8e0f578183dbfb87115c..858722421b40db4f693dcdc33122e60b47b8801f 100644 (file)
@@ -91,13 +91,13 @@ SECTIONS
        {
                . = ALIGN(PAGE_SIZE);
                __sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                __einittext = .;
        }
        .init.data :
        {
                . = ALIGN(16);
-               *(.init.data)
+               INIT_DATA
        }
        .init.setup :
        {
@@ -198,8 +198,8 @@ SECTIONS
 
        /DISCARD/ :
        {
-               *(.exit.text)
-               *(.exit.data)
+               EXIT_TEXT
+               EXIT_DATA
                *(.exitcall.exit)
        }
 }
index 97a7876ed6819061fec0a9be042f60be1d260d77..93c9f0ea286b8f23d5445f154f7e783a298b86c2 100644 (file)
@@ -57,10 +57,10 @@ SECTIONS
        __init_begin = .;
        .init.text : { 
                   _sinittext = .;
-                  *(.init.text)
+                  INIT_TEXT
                   _einittext = .;
        }
-       .init.data : { *(.init.data) }
+       .init.data : { INIT_DATA }
        . = ALIGN(16);
        __setup_start = .;
        .init.setup : { *(.init.setup) }
@@ -109,8 +109,8 @@ SECTIONS
 
        /* Sections to be discarded */
        /DISCARD/ : {
-               *(.text.exit)
-               *(.data.exit)
+               EXIT_TEXT
+               EXIT_DATA
                *(.exitcall.exit)
         }
 
index 9f77eda914ba7448a9f38d515876566d0f338f29..609692f9d5eb424bd906abdb9248701938b188be 100644 (file)
@@ -7,7 +7,7 @@
 target = $(target_compressed_dir)
 src    = $(src_compressed_dir)
 
-CC = gcc-cris -mlinux -march=v32 -I $(TOPDIR)/include
+CC = gcc-cris -mlinux -march=v32 $(LINUXINCLUDE)
 CFLAGS = -O2
 LD = gcc-cris -mlinux -march=v32 -nostdlib
 OBJCOPY = objcopy-cris
index b076c134c0bbd797d9c28940a651523abadf3a63..fead8c59ea63bfcf727091a774982b55b91d131a 100644 (file)
@@ -61,10 +61,10 @@ SECTIONS
        __init_begin = .;
        .init.text : {
                   _sinittext = .;
-                  *(.init.text)
+                  INIT_TEXT
                   _einittext = .;
        }
-       .init.data : { *(.init.data) }
+       .init.data : { INIT_DATA }
        . = ALIGN(16);
        __setup_start = .;
        .init.setup : { *(.init.setup) }
@@ -124,8 +124,8 @@ SECTIONS
 
        /* Sections to be discarded */
        /DISCARD/ : {
-               *(.text.exit)
-               *(.data.exit)
+               EXIT_TEXT
+               EXIT_DATA
                *(.exitcall.exit)
         }
 
index dc6f03824423c76f378c1cc384671e70e2b1503b..6ae3254da01976b6fdaa374f588c3c58a081058c 100644 (file)
@@ -10,7 +10,7 @@
 
 targets := Image zImage bootpImage
 
-SYSTEM =$(TOPDIR)/$(LINUX)
+SYSTEM =$(LINUX)
 
 ZTEXTADDR       = 0x02080000
 PARAMS_PHYS     = 0x0207c000
@@ -45,7 +45,7 @@ zImage:       $(CONFIGURE) compressed/$(LINUX)
 bootpImage: bootp/bootp
        $(OBJCOPY) -O binary -R .note -R .comment -S bootp/bootp $@
 
-compressed/$(LINUX): $(TOPDIR)/$(LINUX) dep
+compressed/$(LINUX): $(LINUX) dep
        @$(MAKE) -C compressed $(LINUX)
 
 bootp/bootp: zImage initrd
@@ -59,10 +59,10 @@ initrd:
 # installation
 #
 install: $(CONFIGURE) Image
-       sh ./install.sh $(KERNELRELEASE) Image $(TOPDIR)/System.map "$(INSTALL_PATH)"
+       sh ./install.sh $(KERNELRELEASE) Image System.map "$(INSTALL_PATH)"
 
 zinstall: $(CONFIGURE) zImage
-       sh ./install.sh $(KERNELRELEASE) zImage $(TOPDIR)/System.map "$(INSTALL_PATH)"
+       sh ./install.sh $(KERNELRELEASE) zImage System.map "$(INSTALL_PATH)"
 
 #
 # miscellany
index e89cad1192a99e5949e12f01020f76a7053671dd..48a0393e7cee1cd0e14fb15a7c20bbee9008fde3 100644 (file)
@@ -87,7 +87,7 @@
  *  Example:
  *    $ cd ~/linux
  *    $ make menuconfig <go to "Kernel Hacking" and turn on remote debugging>
- *    $ make dep; make vmlinux
+ *    $ make vmlinux
  *
  *  Step 3:
  *  Download the kernel to the remote target and start
index a17a81d58bf69386638f89b66a01548e22a4b61e..f42b328b1dd0475e1af868308cdc000c89d40942 100644 (file)
@@ -28,14 +28,14 @@ SECTIONS
   .init.text : {
        *(.text.head)
 #ifndef CONFIG_DEBUG_INFO
-       *(.init.text)
-       *(.exit.text)
-       *(.exit.data)
+       INIT_TEXT
+       EXIT_TEXT
+       EXIT_DATA
        *(.exitcall.exit)
 #endif
   }
   _einittext = .;
-  .init.data : { *(.init.data) }
+  .init.data : { INIT_DATA }
 
   . = ALIGN(8);
   __setup_start = .;
@@ -106,8 +106,8 @@ SECTIONS
        LOCK_TEXT
 #ifdef CONFIG_DEBUG_INFO
        *(
-       .init.text
-       .exit.text
+       INIT_TEXT
+       EXIT_TEXT
        .exitcall.exit
        )
 #endif
@@ -138,7 +138,7 @@ SECTIONS
   .data : {                    /* Data */
        DATA_DATA
        *(.data.*)
-       *(.exit.data)
+       EXIT_DATA
        CONSTRUCTORS
        }
 
index a2e72d495551cd370454556898a9bf687fa869aa..43a87b9085b6b449cb7fbfb64bfb5d715551185c 100644 (file)
@@ -110,9 +110,9 @@ SECTIONS
        . = ALIGN(0x4) ;
        ___init_begin = .;
        __sinittext = .; 
-               *(.init.text)
+               INIT_TEXT
        __einittext = .; 
-               *(.init.data)
+               INIT_DATA
        . = ALIGN(0x4) ;
        ___setup_start = .;
                *(.init.setup)
@@ -124,8 +124,8 @@ SECTIONS
        ___con_initcall_start = .;
                *(.con_initcall.init)
        ___con_initcall_end = .;
-               *(.exit.text)
-               *(.exit.data)
+               EXIT_TEXT
+               EXIT_DATA
 #if defined(CONFIG_BLK_DEV_INITRD)
                . = ALIGN(4);
        ___initramfs_start = .;
index 757e419ebcf81f0ea1da02916e82d7302dc8395f..80622acc95deea98c83f5e2ce97cc11cc4bc883a 100644 (file)
@@ -27,8 +27,8 @@ SECTIONS
 {
   /* Sections to be discarded */
   /DISCARD/ : {
-       *(.exit.text)
-       *(.exit.data)
+       EXIT_TEXT
+       EXIT_DATA
        *(.exitcall.exit)
        *(.IA_64.unwind.exit.text)
        *(.IA_64.unwind_info.exit.text)
@@ -119,12 +119,12 @@ SECTIONS
   .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
        {
          _sinittext = .;
-         *(.init.text)
+         INIT_TEXT
          _einittext = .;
        }
 
   .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
-       { *(.init.data) }
+       { INIT_DATA }
 
 #ifdef CONFIG_BLK_DEV_INITRD
   .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
index 942a8c7a44174b0c010d88c10694523ec93c248f..41b07854fcc609c5d12c2254e1e09a807f1e9417 100644 (file)
@@ -76,10 +76,10 @@ SECTIONS
   __init_begin = .;
   .init.text : {
        _sinittext = .;
-       *(.init.text)
+       INIT_TEXT
        _einittext = .;
   }
-  .init.data : { *(.init.data) }
+  .init.data : { INIT_DATA }
   . = ALIGN(16);
   __setup_start = .;
   .init.setup : { *(.init.setup) }
@@ -100,8 +100,8 @@ SECTIONS
   .altinstr_replacement : { *(.altinstr_replacement) }
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
-  .exit.text : { *(.exit.text) }
-  .exit.data : { *(.exit.data) }
+  .exit.text : { EXIT_TEXT }
+  .exit.data : { EXIT_DATA }
 
 #ifdef CONFIG_BLK_DEV_INITRD
   . = ALIGN(4096);
@@ -124,8 +124,8 @@ SECTIONS
 
   /* Sections to be discarded */
   /DISCARD/ : {
-       *(.exit.text)
-       *(.exit.data)
+       EXIT_TEXT
+       EXIT_DATA
        *(.exitcall.exit)
        }
 
index 59fe285865ec050b958545a94007568d94c7f707..7537cc5e61592ac87c0af127850b214d6d56f977 100644 (file)
@@ -45,10 +45,10 @@ SECTIONS
   __init_begin = .;
   .init.text : {
        _sinittext = .;
-       *(.init.text)
+       INIT_TEXT
        _einittext = .;
   }
-  .init.data : { *(.init.data) }
+  .init.data : { INIT_DATA }
   . = ALIGN(16);
   __setup_start = .;
   .init.setup : { *(.init.setup) }
@@ -82,8 +82,8 @@ SECTIONS
 
   /* Sections to be discarded */
   /DISCARD/ : {
-       *(.exit.text)
-       *(.exit.data)
+       EXIT_TEXT
+       EXIT_DATA
        *(.exitcall.exit)
        }
 
index 4adffefb5c48c673b5f349a6c0142b4a10a26ada..cdc313e7c299a66c2d7700526f00bcd0463142cf 100644 (file)
@@ -38,10 +38,10 @@ SECTIONS
 __init_begin = .;
        .init.text : {
                _sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                _einittext = .;
        }
-       .init.data : { *(.init.data) }
+       .init.data : { INIT_DATA }
        . = ALIGN(16);
        __setup_start = .;
        .init.setup : { *(.init.setup) }
@@ -77,8 +77,8 @@ __init_begin = .;
 
   /* Sections to be discarded */
   /DISCARD/ : {
-       *(.exit.text)
-       *(.exit.data)
+       EXIT_TEXT
+       EXIT_DATA
        *(.exitcall.exit)
        }
 
index 07a0055602f447b4466442c7bedb125d3a149e7f..b44edb08e21276f3dce89e8b938c4e6a5252596c 100644 (file)
@@ -143,9 +143,9 @@ SECTIONS {
                . = ALIGN(4096);
                __init_begin = .;
                _sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                _einittext = .;
-               *(.init.data)
+               INIT_DATA
                . = ALIGN(16);
                __setup_start = .;
                *(.init.setup)
@@ -170,8 +170,8 @@ SECTIONS {
        } > INIT
 
        /DISCARD/ : {
-               *(.exit.text)
-               *(.exit.data)
+               EXIT_TEXT
+               EXIT_DATA
                *(.exitcall.exit)
        }
 
index 5fc2398bdb76673ac8ebf92cbd21db0a5d34b7b9..b5470ceb418b301847f385d72e306eff0b1bf6dc 100644 (file)
@@ -114,11 +114,11 @@ SECTIONS
        __init_begin = .;
        .init.text : {
                _sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                _einittext = .;
        }
        .init.data : {
-               *(.init.data)
+               INIT_DATA
        }
        . = ALIGN(16);
        .init.setup : {
@@ -144,10 +144,10 @@ SECTIONS
         * references from .rodata
         */
        .exit.text : {
-               *(.exit.text)
+               EXIT_TEXT
        }
        .exit.data : {
-               *(.exit.data)
+               EXIT_DATA
        }
 #if defined(CONFIG_BLK_DEV_INITRD)
        . = ALIGN(_PAGE_SIZE);
index e4a5e464b376614139f0119ba8bbc39a0c57f757..a7fe76a64964db66d66ab960c7a96b5c0d1ca3f9 100644 (file)
@@ -1,10 +1,6 @@
 #
 # Makefile for common code for Toshiba TX4927 based systems
 #
-# Note! Dependencies are done automagically by 'make dep', which also
-# removes any old dependencies. DON'T put your own dependencies here
-# unless it's something special (ie not a .c file).
-#
 
 obj-y  += tx4927_prom.o tx4927_irq.o
 
index c5c6ceaa71ca55ebfbfb0f3063599991942fc627..56aa1ed1ee0c947cd8419a349d1476d6921ea728 100644 (file)
@@ -1,10 +1,6 @@
 #
 # Makefile for common code for Toshiba TX4927 based systems
 #
-# Note! Dependencies are done automagically by 'make dep', which also
-# removes any old dependencies. DON'T put your own dependencies here
-# unless it's something special (ie not a .c file).
-#
 
 obj-y  += prom.o irq.o
 obj-$(CONFIG_KGDB) += dbgio.o
index 675bb1c3e40c72cc4776b906c05dd2abb8840904..2316dd7dd1bdf91ba62e0034a4a3b322ef54698c 100644 (file)
@@ -1,10 +1,6 @@
 #
 # Makefile for common code for Toshiba TX4927 based systems
 #
-# Note! Dependencies are done automagically by 'make dep', which also
-# removes any old dependencies. DON'T put your own dependencies here
-# unless it's something special (ie not a .c file).
-#
 
 obj-y  += prom.o setup.o irq.o spi_eeprom.o
 
index 40d0ff9b81ab685acfd88dd5ca1a04158a25b1dd..50b4a3a25d0af70dbfb59134aae12940cbd03f12 100644 (file)
@@ -172,11 +172,11 @@ SECTIONS
        __init_begin = .;
        .init.text : { 
                _sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                _einittext = .;
        }
        .init.data : {
-               *(.init.data)
+               INIT_DATA
        }
        . = ALIGN(16);
        .init.setup : {
@@ -215,10 +215,10 @@ SECTIONS
         *  from .altinstructions and .eh_frame
         */
        .exit.text : {
-               *(.exit.text)
+               EXIT_TEXT
        }
        .exit.data : {
-               *(.exit.data)
+               EXIT_DATA
        }
 #ifdef CONFIG_BLK_DEV_INITRD
        . = ALIGN(PAGE_SIZE);
index 18e32719d0ed4544202bae47cc9c1361e6116649..4b1d98b8135ef5db6da95654bf4f430e4704e4f3 100644 (file)
@@ -65,7 +65,7 @@ obj-wlib := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-wlib))))
 obj-plat := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-plat))))
 
 quiet_cmd_copy_zlib = COPY    $@
-      cmd_copy_zlib = sed "s@__attribute_used__@@;s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
+      cmd_copy_zlib = sed "s@__used@@;s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
 
 quiet_cmd_copy_zlibheader = COPY    $@
       cmd_copy_zlibheader = sed "s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
index 25d9a96484ddeb914ff0d7fe63aa1b0c3baf7e8e..c8127f832df0f6d28d8efb0ee3a2097f8933dcce 100644 (file)
@@ -158,7 +158,7 @@ static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
        unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
        return sprintf(buf, "%lx\n", val); \
 } \
-static ssize_t __attribute_used__ \
+static ssize_t __used \
        store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
 { \
        struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
index f66fa5d966b0d75ebecf99d53ac4723e8f2622ce..0afb9e31d2a008a79f1b5a4d2ff70d8dc798a458 100644 (file)
@@ -23,7 +23,7 @@ SECTIONS
        /* Sections to be discarded. */
        /DISCARD/ : {
        *(.exitcall.exit)
-       *(.exit.data)
+       EXIT_DATA
        }
 
        . = KERNELBASE;
@@ -76,17 +76,19 @@ SECTIONS
 
        .init.text : {
                _sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                _einittext = .;
        }
 
        /* .exit.text is discarded at runtime, not link time,
         * to deal with references from __bug_table
         */
-       .exit.text : { *(.exit.text) }
+       .exit.text : {
+               EXIT_TEXT
+       }
 
        .init.data : {
-               *(.init.data);
+               INIT_DATA
                __vtop_table_begin = .;
                *(.vtop_fixup);
                __vtop_table_end = .;
index cddc250a6a5cf13325556175cbbed3473c61ec02..446a8bbb847b0f2dcd679ac74eaece531cee4476 100644 (file)
@@ -172,15 +172,15 @@ static void power4_stop(void)
 }
 
 /* Fake functions used by canonicalize_pc */
-static void __attribute_used__ hypervisor_bucket(void)
+static void __used hypervisor_bucket(void)
 {
 }
 
-static void __attribute_used__ rtas_bucket(void)
+static void __used rtas_bucket(void)
 {
 }
 
-static void __attribute_used__ kernel_unknown_bucket(void)
+static void __used kernel_unknown_bucket(void)
 {
 }
 
index 98c1212674f6e76c2953c5a9a8a5344bb6b94ca1..52b64fcbdfc50d493a76bef799400e2365cdd15e 100644 (file)
@@ -97,14 +97,14 @@ SECTIONS
   __init_begin = .;
   .init.text : {
        _sinittext = .;
-       *(.init.text)
+       INIT_TEXT
        _einittext = .;
   }
   /* .exit.text is discarded at runtime, not link time,
      to deal with references from __bug_table */
-  .exit.text : { *(.exit.text) }
+  .exit.text : { EXIT_TEXT }
   .init.data : {
-    *(.init.data);
+    INIT_DATA
     __vtop_table_begin = .;
     *(.vtop_fixup);
     __vtop_table_end = .;
@@ -164,6 +164,6 @@ SECTIONS
   /* Sections to be discarded. */
   /DISCARD/ : {
     *(.exitcall.exit)
-    *(.exit.data)
+    EXIT_DATA
   }
 }
index 936159199346520387f895c7fec5f6ffcba33345..7d43c3cd3ef35880b5b0fd5a5fbb08b33a51e605 100644 (file)
@@ -97,7 +97,7 @@ SECTIONS
        __init_begin = .;
        .init.text : {
                _sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                _einittext = .;
        }
        /*
@@ -105,11 +105,11 @@ SECTIONS
         * to deal with references from __bug_table
        */
        .exit.text : {
-               *(.exit.text)
+               EXIT_TEXT
        }
 
        .init.data : {
-               *(.init.data)
+               INIT_DATA
        }
        . = ALIGN(0x100);
        .init.setup : {
@@ -156,7 +156,7 @@ SECTIONS
 
        /* Sections to be discarded */
        /DISCARD/ : {
-               *(.exit.data)
+               EXIT_DATA
                *(.exitcall.exit)
        }
 
index d549fac6d3e7dc5cb1692122ad890d670c1a4b9a..c7113786ecd4c1158c461128880cfef650a31857 100644 (file)
@@ -84,9 +84,9 @@ SECTIONS
        . = ALIGN(PAGE_SIZE);           /* Init code and data */
        __init_begin = .;
        _sinittext = .;
-       .init.text : { *(.init.text) }
+       .init.text : { INIT_TEXT }
        _einittext = .;
-       .init.data : { *(.init.data) }
+       .init.data : { INIT_DATA }
 
        . = ALIGN(16);
        __setup_start = .;
@@ -122,8 +122,8 @@ SECTIONS
         * .exit.text is discarded at runtime, not link time, to deal with
         * references from __bug_table
         */
-       .exit.text : { *(.exit.text) }
-       .exit.data : { *(.exit.data) }
+       .exit.text : { EXIT_TEXT }
+       .exit.data : { EXIT_DATA }
 
        . = ALIGN(PAGE_SIZE);
        .bss : {
index 2fd0f74014846ac5c920ea58fcbc3c124a6ae66a..3f1bd6392bb343c8699c309c5d98da640ec839d3 100644 (file)
@@ -96,9 +96,9 @@ SECTIONS
        . = ALIGN(PAGE_SIZE);           /* Init code and data */
        __init_begin = .;
        _sinittext = .;
-       .init.text : C_PHYS(.init.text) { *(.init.text) }
+       .init.text : C_PHYS(.init.text) { INIT_TEXT }
        _einittext = .;
-       .init.data : C_PHYS(.init.data) { *(.init.data) }
+       .init.data : C_PHYS(.init.data) { INIT_DATA }
        . = ALIGN(L1_CACHE_BYTES);      /* Better if Cache Line aligned */
        __setup_start = .;
        .init.setup : C_PHYS(.init.setup) { *(.init.setup) }
@@ -134,8 +134,8 @@ SECTIONS
         * .exit.text is discarded at runtime, not link time, to deal with
         * references from __bug_table
         */
-       .exit.text : C_PHYS(.exit.text) { *(.exit.text) }
-       .exit.data : C_PHYS(.exit.data) { *(.exit.data) }
+       .exit.text : C_PHYS(.exit.text) { EXIT_TEXT }
+       .exit.data : C_PHYS(.exit.data) { EXIT_DATA }
 
        . = ALIGN(PAGE_SIZE);
        .bss : C_PHYS(.bss) {
index a8b4200f9cc379e0bc6319e6f2c786ae3df2f4b6..216147d6e61f6882d8050c161d356c4bd6f15bec 100644 (file)
@@ -48,12 +48,12 @@ SECTIONS
        __init_begin = .;
        .init.text : {
                _sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                _einittext = .;
        }
        __init_text_end = .;
        .init.data : {
-               *(.init.data)
+               INIT_DATA
        }
        . = ALIGN(16);
        .init.setup : {
@@ -102,8 +102,8 @@ SECTIONS
        _end = . ;
        PROVIDE (end = .);
        /DISCARD/ : {
-               *(.exit.text)
-               *(.exit.data)
+               EXIT_TEXT
+               EXIT_DATA
                *(.exitcall.exit)
        }
 
index 953be816fa2564b6692a7f3a25d884337958f017..dc7bf1b6321ceb89fbe658cce07f08f7df513808 100644 (file)
@@ -175,7 +175,7 @@ unsigned long compute_effective_address(struct pt_regs *regs,
 }
 
 /* This is just to make gcc think die_if_kernel does return... */
-static void __attribute_used__ unaligned_panic(char *str, struct pt_regs *regs)
+static void __used unaligned_panic(char *str, struct pt_regs *regs)
 {
        die_if_kernel(str, regs);
 }
index 9fcd503bc04ad5574488379dc43787cf4cabfec3..01f809617e5e708df97764794de91b3a6bad8c81 100644 (file)
@@ -56,11 +56,11 @@ SECTIONS
        .init.text : {
                __init_begin = .;
                _sinittext = .;
-               *(.init.text)
+               INIT_TEXT
                _einittext = .;
        }
        .init.data : {
-               *(.init.data)
+               INIT_DATA
        }
        . = ALIGN(16);
        .init.setup : {
@@ -137,8 +137,8 @@ SECTIONS
        PROVIDE (end = .);
 
        /DISCARD/ : {
-               *(.exit.text)
-               *(.exit.data)
+               EXIT_TEXT
+               EXIT_DATA
                *(.exitcall.exit)
        }
 
index d4de7c0120ced888209122f47424d9754c1ed50f..cebc6cae91903442235e8f4b4e782b71a5feb5c9 100644 (file)
@@ -42,15 +42,15 @@ typedef void (*exitcall_t)(void);
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init         __attribute__ ((__section__ (".init.text")))
-#define __initdata     __attribute__ ((__section__ (".init.data")))
-#define __exitdata     __attribute__ ((__section__(".exit.data")))
-#define __exit_call    __attribute_used__ __attribute__ ((__section__ (".exitcall.exit")))
+#define __init         __section(.init.text)
+#define __initdata     __section(.init.data)
+#define __exitdata     __section(.exit.data)
+#define __exit_call    __used __section(.exitcall.exit)
 
 #ifdef MODULE
-#define __exit         __attribute__ ((__section__(".exit.text")))
+#define __exit         __section(.exit.text)
 #else
-#define __exit         __attribute_used__ __attribute__ ((__section__(".exit.text")))
+#define __exit         __used __section(.exit.text)
 #endif
 
 #endif
@@ -103,16 +103,16 @@ extern struct uml_param __uml_setup_start, __uml_setup_end;
  * Mark functions and data as being only used at initialization
  * or exit time.
  */
-#define __uml_init_setup       __attribute_used__ __attribute__ ((__section__ (".uml.setup.init")))
-#define __uml_setup_help       __attribute_used__ __attribute__ ((__section__ (".uml.help.init")))
-#define __uml_init_call                __attribute_used__ __attribute__ ((__section__ (".uml.initcall.init")))
-#define __uml_postsetup_call   __attribute_used__ __attribute__ ((__section__ (".uml.postsetup.init")))
-#define __uml_exit_call                __attribute_used__ __attribute__ ((__section__ (".uml.exitcall.exit")))
+#define __uml_init_setup       __used __section(.uml.setup.init)
+#define __uml_setup_help       __used __section(.uml.help.init)
+#define __uml_init_call                __used __section(.uml.initcall.init)
+#define __uml_postsetup_call   __used __section(.uml.postsetup.init)
+#define __uml_exit_call                __used __section(.uml.exitcall.exit)
 
 #ifndef __KERNEL__
 
 #define __define_initcall(level,fn) \
-       static initcall_t __initcall_##fn __attribute_used__ \
+       static initcall_t __initcall_##fn __used \
        __attribute__((__section__(".initcall" level ".init"))) = fn
 
 /* Userspace initcalls shouldn't depend on anything in the kernel, so we'll
@@ -122,7 +122,7 @@ extern struct uml_param __uml_setup_start, __uml_setup_end;
 
 #define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define __init_call    __attribute_used__ __attribute__ ((__section__ (".initcall.init")))
+#define __init_call    __used __section(.initcall.init)
 
 #endif
 
index 3866f4960f04519de02a1ab4c6cdff3452d29ffc..26090b7f323ecf3806f0a86668250ebad253bc6f 100644 (file)
@@ -17,7 +17,7 @@ SECTIONS
   __init_begin = .;
   .init.text : {
        _sinittext = .;
-       *(.init.text)
+       INIT_TEXT
        _einittext = .;
   }
 
@@ -84,7 +84,7 @@ SECTIONS
 
   #include "asm/common.lds.S"
 
-  init.data : { *(.init.data) }
+  init.data : { INIT_DATA }
 
   /* Ensure the __preinit_array_start label is properly aligned.  We
      could instead move the label definition inside the section, but
index 13df191e2b41e6b66fe6c3668b5daa689dada71a..5828c1d54505f7fa0603c86a1b819fd52e528e7a 100644 (file)
@@ -23,7 +23,7 @@ SECTIONS
   __init_begin = .;
   .init.text : {
        _sinittext = .;
-       *(.init.text)
+       INIT_TEXT
        _einittext = .;
   }
   . = ALIGN(4096);
@@ -48,7 +48,7 @@ SECTIONS
 
   #include "asm/common.lds.S"
 
-  init.data : { *(init.data) }
+  init.data : { INIT_DATA }
   .data    :
   {
     . = ALIGN(KERNEL_STACK_SIZE);              /* init_task */
index 6172599b4ce2aab5e3fe3551cb351b44ecd5ff0c..d08cd1d27f27a77cfdf5375bd14d54534acf27ea 100644 (file)
 #define DATA_CONTENTS                                                        \
                __sdata = . ;                                                 \
                DATA_DATA                                                     \
-                       *(.exit.data)   /* 2.5 convention */                  \
+                       EXIT_DATA       /* 2.5 convention */                  \
                        *(.data.exit)   /* 2.4 convention */                  \
                . = ALIGN (16) ;                                              \
                *(.data.cacheline_aligned)                                    \
                . = ALIGN (4096) ;                                            \
                __init_start = . ;                                            \
                        __sinittext = .;                                      \
-                       *(.init.text)   /* 2.5 convention */                  \
+                       INIT_TEXT       /* 2.5 convention */                  \
                        __einittext = .;                                      \
-                       *(.init.data)                                         \
+                       INIT_DATA                                             \
                        *(.text.init)   /* 2.4 convention */                  \
                        *(.data.init)                                         \
                INITCALL_CONTENTS                                             \
 #define ROMK_INIT_RAM_CONTENTS                                               \
                . = ALIGN (4096) ;                                            \
                __init_start = . ;                                            \
-                       *(.init.data)   /* 2.5 convention */                  \
+                       INIT_DATA       /* 2.5 convention */                  \
                        *(.data.init)   /* 2.4 convention */                  \
                __init_end = . ;                                              \
                . = ALIGN (4096) ;
    should go into ROM.  */     
 #define ROMK_INIT_ROM_CONTENTS                                               \
                        _sinittext = .;                                       \
-                       *(.init.text)   /* 2.5 convention */                  \
+                       INIT_TEXT       /* 2.5 convention */                  \
                        _einittext = .;                                       \
                        *(.text.init)   /* 2.4 convention */                  \
                INITCALL_CONTENTS                                             \
index 7d72cce0052946c93446eaa279c0d476da80af51..84c913f38f980b621e6549ccca86f3a01a8666af 100644 (file)
@@ -131,10 +131,12 @@ SECTIONS
   .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
        __init_begin = .;
        _sinittext = .;
-       *(.init.text)
+       INIT_TEXT
        _einittext = .;
   }
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
+  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
+       INIT_DATA
+  }
   . = ALIGN(16);
   .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
        __setup_start = .;
@@ -169,8 +171,12 @@ SECTIONS
   }
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
-  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
-  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
+  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+       EXIT_TEXT
+  }
+  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+       EXIT_DATA
+  }
 #if defined(CONFIG_BLK_DEV_INITRD)
   . = ALIGN(4096);
   .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
index ba8ea97abd219359d6e7c68541e375dbe97f5c7a..ea5386944e67e75da4e88712272ed251cad93761 100644 (file)
@@ -155,12 +155,15 @@ SECTIONS
   __init_begin = .;
   .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
        _sinittext = .;
-       *(.init.text)
+       INIT_TEXT
        _einittext = .;
   }
-  __initdata_begin = .;
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
-  __initdata_end = .;
+  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
+       __initdata_begin = .;
+       INIT_DATA
+       __initdata_end = .;
+   }
+
   . = ALIGN(16);
   __setup_start = .;
   .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
@@ -187,8 +190,12 @@ SECTIONS
   }
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
-  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
-  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
+  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+       EXIT_TEXT
+  }
+  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+       EXIT_DATA
+  }
 
 /* vdso blob that is mapped into user space */
   vdso_start = . ;
index ac4ed52034dbfb44c00564e9af7020750f0a75aa..7d0f55a4982d41ba997e4ec557807d4a768fde23 100644 (file)
@@ -136,13 +136,13 @@ SECTIONS
   __init_begin = .;
   .init.text : {
        _sinittext = .;
-       *(.init.literal) *(.init.text)
+       *(.init.literal) INIT_TEXT
        _einittext = .;
   }
 
   .init.data :
   {
-    *(.init.data)
+    INIT_DATA
     . = ALIGN(0x4);
     __tagtable_begin = .;
     *(.taglist)
@@ -278,8 +278,9 @@ SECTIONS
   /* Sections to be discarded */
   /DISCARD/ :
   {
-       *(.exit.literal .exit.text)
-       *(.exit.data)
+       *(.exit.literal)
+       EXIT_TEXT
+       EXIT_DATA
         *(.exitcall.exit)
   }
 
index 10aec22a8f98b2478fcc7c8d01b8b55bce88c035..64e304a2f884e4395fdff94d46da28d717a6c79b 100644 (file)
@@ -1,9 +1,5 @@
 #
 # Makefile for the Linux/Xtensa-specific parts of the memory manager.
 #
-# Note! Dependencies are done automagically by 'make dep', which also
-# removes any old dependencies. DON'T put your own dependencies here
-# unless it's something special (ie not a .c file).
-#
 
 obj-y   := init.o fault.o tlb.o misc.o cache.o
index 5b394e9620e5c70a6b104bbc818c23b039711eca..af96e314d71fa7a228abcb371ff862246410f9c9 100644 (file)
@@ -3,11 +3,6 @@
 # Makefile for the Xtensa Instruction Set Simulator (ISS)
 # "prom monitor" library routines under Linux.
 #
-# Note! Dependencies are done automagically by 'make dep', which also
-# removes any old dependencies. DON'T put your own dependencies here
-# unless it's something special (ie not a .c file).
-#
-# Note 2! The CFLAGS definitions are in the main makefile...
 
 obj-y                  = io.o console.o setup.o network.o
 
index 06a86fe6a78d77a1c2d0a3b7e84003456c430933..de28dfd3b96c6ff4db97ed85320f017a8542e799 100644 (file)
@@ -2,9 +2,5 @@ obj-$(CONFIG_PM)        += sysfs.o
 obj-$(CONFIG_PM_SLEEP) += main.o
 obj-$(CONFIG_PM_TRACE) += trace.o
 
-ifeq ($(CONFIG_DEBUG_DRIVER),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
-ifeq ($(CONFIG_PM_VERBOSE),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
+ccflags-$(CONFIG_PM_VERBOSE)   += -DDEBUG
index 36b98989b15ecaf79274a547a0733a42859fb0d5..7e7b5a66f042ad096b9960cdb6e6ebf66e759d6a 100644 (file)
@@ -1,5 +1,4 @@
-EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 \
-               -I$(TOPDIR)/drivers/infiniband/hw/cxgb3/core
+EXTRA_CFLAGS += -Idrivers/net/cxgb3
 
 obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
 
index b242cee656e77e5bf1a57069e15db0517b389ee2..80e3f03b5041e0348f766b14caca2fa601efa82f 100644 (file)
@@ -31,8 +31,8 @@ extern struct rio_route_ops __end_rio_route_ops[];
 
 /* Helpers internal to the RIO core code */
 #define DECLARE_RIO_ROUTE_SECTION(section, vid, did, add_hook, get_hook)  \
-        static struct rio_route_ops __rio_route_ops __attribute_used__   \
-               __attribute__((__section__(#section))) = { vid, did, add_hook, get_hook };
+       static struct rio_route_ops __rio_route_ops __used   \
+       __section(section)= { vid, did, add_hook, get_hook };
 
 /**
  * DECLARE_RIO_ROUTE_OPS - Registers switch routing operations
index 9656139d2e990ce8c11e6ed34a2b61182da332ff..219ec06a8c7e2cd057a28898c4827776a1abf0c3 100644 (file)
@@ -236,6 +236,7 @@ config JBD_DEBUG
 
 config JBD2
        tristate
+       select CRC32
        help
          This is a generic journaling layer for block devices that support
          both 32-bit and 64-bit block numbers.  It is currently used by
index 33fe39ad4e0327a91e925c10ad54629c5f7c763a..0cc3597c11971380716d10083eee9fa7660f82dd 100644 (file)
@@ -546,11 +546,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
        dentry->d_op = &afs_fs_dentry_operations;
 
        d_add(dentry, inode);
-       _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }",
+       _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
               fid.vnode,
               fid.unique,
               dentry->d_inode->i_ino,
-              dentry->d_inode->i_version);
+              (unsigned long long)dentry->d_inode->i_version);
 
        return NULL;
 }
@@ -630,9 +630,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
                 * been deleted and replaced, and the original vnode ID has
                 * been reused */
                if (fid.unique != vnode->fid.unique) {
-                       _debug("%s: file deleted (uq %u -> %u I:%lu)",
+                       _debug("%s: file deleted (uq %u -> %u I:%llu)",
                               dentry->d_name.name, fid.unique,
-                              vnode->fid.unique, dentry->d_inode->i_version);
+                              vnode->fid.unique,
+                              (unsigned long long)dentry->d_inode->i_version);
                        spin_lock(&vnode->lock);
                        set_bit(AFS_VNODE_DELETED, &vnode->flags);
                        spin_unlock(&vnode->lock);
index d196840127c6bdf7174defa94fc7184c25e2ad92..84750c8e9f95be1b07ba30ed7ab4778cf1ae9599 100644 (file)
@@ -301,7 +301,8 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 
        inode = dentry->d_inode;
 
-       _enter("{ ino=%lu v=%lu }", inode->i_ino, inode->i_version);
+       _enter("{ ino=%lu v=%llu }", inode->i_ino,
+               (unsigned long long)inode->i_version);
 
        generic_fillattr(inode, stat);
        return 0;
index 7249e014819e1a0432621d1d2576fd259f27632d..456c9ab7705b0fce1f7299ee455c26614330fb34 100644 (file)
@@ -3213,6 +3213,50 @@ static int buffer_cpu_notify(struct notifier_block *self,
        return NOTIFY_OK;
 }
 
+/**
+ * bh_uptodate_or_lock: Test whether the buffer is uptodate
+ * @bh: struct buffer_head
+ *
+ * Return true if the buffer is up-to-date and false,
+ * with the buffer locked, if not.
+ */
+int bh_uptodate_or_lock(struct buffer_head *bh)
+{
+       if (!buffer_uptodate(bh)) {
+               lock_buffer(bh);
+               if (!buffer_uptodate(bh))
+                       return 0;
+               unlock_buffer(bh);
+       }
+       return 1;
+}
+EXPORT_SYMBOL(bh_uptodate_or_lock);
+
+/**
+ * bh_submit_read: Submit a locked buffer for reading
+ * @bh: struct buffer_head
+ *
+ * Returns zero on success and -EIO on error.
+ */
+int bh_submit_read(struct buffer_head *bh)
+{
+       BUG_ON(!buffer_locked(bh));
+
+       if (buffer_uptodate(bh)) {
+               unlock_buffer(bh);
+               return 0;
+       }
+
+       get_bh(bh);
+       bh->b_end_io = end_buffer_read_sync;
+       submit_bh(READ, bh);
+       wait_on_buffer(bh);
+       if (buffer_uptodate(bh))
+               return 0;
+       return -EIO;
+}
+EXPORT_SYMBOL(bh_submit_read);
+
 void __init buffer_init(void)
 {
        int nrpages;
index da8cb3b3592c96ade1614ab8e553b5e16597b42a..ffdc022cae64adb62b5108d259f08fa3057b1423 100644 (file)
@@ -1376,7 +1376,7 @@ static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg)
         return -EINVAL;
 }
 
-static __attribute_used__ int 
+static __used int
 ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
        return -EINVAL;
index 154e25f13d772235e7c7d242a0b5e6fdbf977cd7..6abaf75163f0b17b1cbfb9b8b6e7ad563930bfdc 100644 (file)
@@ -680,11 +680,31 @@ static int ext2_check_descriptors (struct super_block * sb)
 static loff_t ext2_max_size(int bits)
 {
        loff_t res = EXT2_NDIR_BLOCKS;
-       /* This constant is calculated to be the largest file size for a
-        * dense, 4k-blocksize file such that the total number of
+       int meta_blocks;
+       loff_t upper_limit;
+
+       /* This is calculated to be the largest file size for a
+        * dense, file such that the total number of
         * sectors in the file, including data and all indirect blocks,
-        * does not exceed 2^32. */
-       const loff_t upper_limit = 0x1ff7fffd000LL;
+        * does not exceed 2^32 -1
+        * __u32 i_blocks representing the total number of
+        * 512 bytes blocks of the file
+        */
+       upper_limit = (1LL << 32) - 1;
+
+       /* total blocks in file system block size */
+       upper_limit >>= (bits - 9);
+
+
+       /* indirect blocks */
+       meta_blocks = 1;
+       /* double indirect blocks */
+       meta_blocks += 1 + (1LL << (bits-2));
+       /* tripple indirect blocks */
+       meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
+
+       upper_limit -= meta_blocks;
+       upper_limit <<= bits;
 
        res += 1LL << (bits-2);
        res += 1LL << (2*(bits-2));
@@ -692,6 +712,10 @@ static loff_t ext2_max_size(int bits)
        res <<= bits;
        if (res > upper_limit)
                res = upper_limit;
+
+       if (res > MAX_LFS_FILESIZE)
+               res = MAX_LFS_FILESIZE;
+
        return res;
 }
 
index cb14de1502c35783fd89d11f4b362cd28faaf91c..f3675cc630e97a7c5ab42251193050872957c5c1 100644 (file)
@@ -1436,11 +1436,31 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 static loff_t ext3_max_size(int bits)
 {
        loff_t res = EXT3_NDIR_BLOCKS;
-       /* This constant is calculated to be the largest file size for a
-        * dense, 4k-blocksize file such that the total number of
+       int meta_blocks;
+       loff_t upper_limit;
+
+       /* This is calculated to be the largest file size for a
+        * dense, file such that the total number of
         * sectors in the file, including data and all indirect blocks,
-        * does not exceed 2^32. */
-       const loff_t upper_limit = 0x1ff7fffd000LL;
+        * does not exceed 2^32 -1
+        * __u32 i_blocks representing the total number of
+        * 512 bytes blocks of the file
+        */
+       upper_limit = (1LL << 32) - 1;
+
+       /* total blocks in file system block size */
+       upper_limit >>= (bits - 9);
+
+
+       /* indirect blocks */
+       meta_blocks = 1;
+       /* double indirect blocks */
+       meta_blocks += 1 + (1LL << (bits-2));
+       /* tripple indirect blocks */
+       meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
+
+       upper_limit -= meta_blocks;
+       upper_limit <<= bits;
 
        res += 1LL << (bits-2);
        res += 1LL << (2*(bits-2));
@@ -1448,6 +1468,10 @@ static loff_t ext3_max_size(int bits)
        res <<= bits;
        if (res > upper_limit)
                res = upper_limit;
+
+       if (res > MAX_LFS_FILESIZE)
+               res = MAX_LFS_FILESIZE;
+
        return res;
 }
 
index ae6e7e502ac9c0d5585370ba0c051410ab766ad8..ac6fa8ca0a2f1b7b3a99f03accce58f6f666e209 100644 (file)
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
 
 ext4dev-y      := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
                   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-                  ext4_jbd2.o
+                  ext4_jbd2.o migrate.o mballoc.o
 
 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)     += xattr.o xattr_user.o xattr_trusted.o
 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
index 71ee95e534fdcb6c7128aa843accea7b9a082eec..ac75ea953d83fbfe3d3919783f85c963d905c264 100644 (file)
@@ -29,7 +29,7 @@
  * Calculate the block group number and offset, given a block number
  */
 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
-               unsigned long *blockgrpp, ext4_grpblk_t *offsetp)
+               ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
 {
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
        ext4_grpblk_t offset;
@@ -46,7 +46,7 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 /* Initializes an uninitialized block bitmap if given, and returns the
  * number of blocks free in the group. */
 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
-                               int block_group, struct ext4_group_desc *gdp)
+                ext4_group_t block_group, struct ext4_group_desc *gdp)
 {
        unsigned long start;
        int bit, bit_max;
@@ -60,7 +60,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                 * essentially implementing a per-group read-only flag. */
                if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
                        ext4_error(sb, __FUNCTION__,
-                                  "Checksum bad for group %u\n", block_group);
+                                 "Checksum bad for group %lu\n", block_group);
                        gdp->bg_free_blocks_count = 0;
                        gdp->bg_free_inodes_count = 0;
                        gdp->bg_itable_unused = 0;
@@ -153,7 +153,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
  *                     group descriptor
  */
 struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
-                                            unsigned int block_group,
+                                            ext4_group_t block_group,
                                             struct buffer_head ** bh)
 {
        unsigned long group_desc;
@@ -164,7 +164,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
        if (block_group >= sbi->s_groups_count) {
                ext4_error (sb, "ext4_get_group_desc",
                            "block_group >= groups_count - "
-                           "block_group = %d, groups_count = %lu",
+                           "block_group = %lu, groups_count = %lu",
                            block_group, sbi->s_groups_count);
 
                return NULL;
@@ -176,7 +176,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
        if (!sbi->s_group_desc[group_desc]) {
                ext4_error (sb, "ext4_get_group_desc",
                            "Group descriptor not loaded - "
-                           "block_group = %d, group_desc = %lu, desc = %lu",
+                           "block_group = %lu, group_desc = %lu, desc = %lu",
                             block_group, group_desc, offset);
                return NULL;
        }
@@ -189,18 +189,70 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
        return desc;
 }
 
+static int ext4_valid_block_bitmap(struct super_block *sb,
+                                       struct ext4_group_desc *desc,
+                                       unsigned int block_group,
+                                       struct buffer_head *bh)
+{
+       ext4_grpblk_t offset;
+       ext4_grpblk_t next_zero_bit;
+       ext4_fsblk_t bitmap_blk;
+       ext4_fsblk_t group_first_block;
+
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+               /* with FLEX_BG, the inode/block bitmaps and itable
+                * blocks may not be in the group at all
+                * so the bitmap validation will be skipped for those groups
+                * or it has to also read the block group where the bitmaps
+                * are located to verify they are set.
+                */
+               return 1;
+       }
+       group_first_block = ext4_group_first_block_no(sb, block_group);
+
+       /* check whether block bitmap block number is set */
+       bitmap_blk = ext4_block_bitmap(sb, desc);
+       offset = bitmap_blk - group_first_block;
+       if (!ext4_test_bit(offset, bh->b_data))
+               /* bad block bitmap */
+               goto err_out;
+
+       /* check whether the inode bitmap block number is set */
+       bitmap_blk = ext4_inode_bitmap(sb, desc);
+       offset = bitmap_blk - group_first_block;
+       if (!ext4_test_bit(offset, bh->b_data))
+               /* bad block bitmap */
+               goto err_out;
+
+       /* check whether the inode table block number is set */
+       bitmap_blk = ext4_inode_table(sb, desc);
+       offset = bitmap_blk - group_first_block;
+       next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
+                               offset + EXT4_SB(sb)->s_itb_per_group,
+                               offset);
+       if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group)
+               /* good bitmap for inode tables */
+               return 1;
+
+err_out:
+       ext4_error(sb, __FUNCTION__,
+                       "Invalid block bitmap - "
+                       "block_group = %d, block = %llu",
+                       block_group, bitmap_blk);
+       return 0;
+}
 /**
  * read_block_bitmap()
  * @sb:                        super block
  * @block_group:       given block group
  *
- * Read the bitmap for a given block_group, reading into the specified
- * slot in the superblock's bitmap cache.
+ * Read the bitmap for a given block_group,and validate the
+ * bits for block/inode/inode tables are set in the bitmaps
  *
  * Return buffer_head on success or NULL in case of failure.
  */
 struct buffer_head *
-read_block_bitmap(struct super_block *sb, unsigned int block_group)
+read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 {
        struct ext4_group_desc * desc;
        struct buffer_head * bh = NULL;
@@ -210,25 +262,36 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
        if (!desc)
                return NULL;
        bitmap_blk = ext4_block_bitmap(sb, desc);
+       bh = sb_getblk(sb, bitmap_blk);
+       if (unlikely(!bh)) {
+               ext4_error(sb, __FUNCTION__,
+                           "Cannot read block bitmap - "
+                           "block_group = %d, block_bitmap = %llu",
+                           (int)block_group, (unsigned long long)bitmap_blk);
+               return NULL;
+       }
+       if (bh_uptodate_or_lock(bh))
+               return bh;
+
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-               bh = sb_getblk(sb, bitmap_blk);
-               if (!buffer_uptodate(bh)) {
-                       lock_buffer(bh);
-                       if (!buffer_uptodate(bh)) {
-                               ext4_init_block_bitmap(sb, bh, block_group,
-                                                      desc);
-                               set_buffer_uptodate(bh);
-                       }
-                       unlock_buffer(bh);
-               }
-       } else {
-               bh = sb_bread(sb, bitmap_blk);
+               ext4_init_block_bitmap(sb, bh, block_group, desc);
+               set_buffer_uptodate(bh);
+               unlock_buffer(bh);
+               return bh;
        }
-       if (!bh)
-               ext4_error (sb, __FUNCTION__,
+       if (bh_submit_read(bh) < 0) {
+               put_bh(bh);
+               ext4_error(sb, __FUNCTION__,
                            "Cannot read block bitmap - "
                            "block_group = %d, block_bitmap = %llu",
-                           block_group, bitmap_blk);
+                           (int)block_group, (unsigned long long)bitmap_blk);
+               return NULL;
+       }
+       if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) {
+               put_bh(bh);
+               return NULL;
+       }
+
        return bh;
 }
 /*
@@ -320,7 +383,7 @@ restart:
  */
 static int
 goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
-                       unsigned int group, struct super_block * sb)
+                       ext4_group_t group, struct super_block *sb)
 {
        ext4_fsblk_t group_first_block, group_last_block;
 
@@ -463,7 +526,7 @@ static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
  * when setting the reservation window size through ioctl before the file
  * is open for write (needs block allocation).
  *
- * Needs truncate_mutex protection prior to call this function.
+ * Needs down_write(i_data_sem) protection prior to call this function.
  */
 void ext4_init_block_alloc_info(struct inode *inode)
 {
@@ -514,6 +577,8 @@ void ext4_discard_reservation(struct inode *inode)
        struct ext4_reserve_window_node *rsv;
        spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
 
+       ext4_mb_discard_inode_preallocations(inode);
+
        if (!block_i)
                return;
 
@@ -540,7 +605,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 {
        struct buffer_head *bitmap_bh = NULL;
        struct buffer_head *gd_bh;
-       unsigned long block_group;
+       ext4_group_t block_group;
        ext4_grpblk_t bit;
        unsigned long i;
        unsigned long overflow;
@@ -587,11 +652,13 @@ do_more:
            in_range(ext4_inode_bitmap(sb, desc), block, count) ||
            in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
            in_range(block + count - 1, ext4_inode_table(sb, desc),
-                    sbi->s_itb_per_group))
+                    sbi->s_itb_per_group)) {
                ext4_error (sb, "ext4_free_blocks",
                            "Freeing blocks in system zones - "
                            "Block = %llu, count = %lu",
                            block, count);
+               goto error_return;
+       }
 
        /*
         * We are about to start releasing blocks in the bitmap,
@@ -720,19 +787,29 @@ error_return:
  * @inode:             inode
  * @block:             start physical block to free
  * @count:             number of blocks to count
+ * @metadata:          Are these metadata blocks
  */
 void ext4_free_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t block, unsigned long count)
+                       ext4_fsblk_t block, unsigned long count,
+                       int metadata)
 {
        struct super_block * sb;
        unsigned long dquot_freed_blocks;
 
+       /* this isn't the right place to decide whether block is metadata
+        * inode.c/extents.c knows better, but for safety ... */
+       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
+                       ext4_should_journal_data(inode))
+               metadata = 1;
+
        sb = inode->i_sb;
-       if (!sb) {
-               printk ("ext4_free_blocks: nonexistent device");
-               return;
-       }
-       ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+
+       if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
+               ext4_free_blocks_sb(handle, sb, block, count,
+                                               &dquot_freed_blocks);
+       else
+               ext4_mb_free_blocks(handle, inode, block, count,
+                                               metadata, &dquot_freed_blocks);
        if (dquot_freed_blocks)
                DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
        return;
@@ -920,9 +997,10 @@ claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
  * ext4_journal_release_buffer(), else we'll run out of credits.
  */
 static ext4_grpblk_t
-ext4_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
-                       struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal,
-                       unsigned long *count, struct ext4_reserve_window *my_rsv)
+ext4_try_to_allocate(struct super_block *sb, handle_t *handle,
+                       ext4_group_t group, struct buffer_head *bitmap_bh,
+                       ext4_grpblk_t grp_goal, unsigned long *count,
+                       struct ext4_reserve_window *my_rsv)
 {
        ext4_fsblk_t group_first_block;
        ext4_grpblk_t start, end;
@@ -1156,7 +1234,7 @@ static int find_next_reservable_window(
  */
 static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
                ext4_grpblk_t grp_goal, struct super_block *sb,
-               unsigned int group, struct buffer_head *bitmap_bh)
+               ext4_group_t group, struct buffer_head *bitmap_bh)
 {
        struct ext4_reserve_window_node *search_head;
        ext4_fsblk_t group_first_block, group_end_block, start_block;
@@ -1354,7 +1432,7 @@ static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
  */
 static ext4_grpblk_t
 ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
-                       unsigned int group, struct buffer_head *bitmap_bh,
+                       ext4_group_t group, struct buffer_head *bitmap_bh,
                        ext4_grpblk_t grp_goal,
                        struct ext4_reserve_window_node * my_rsv,
                        unsigned long *count, int *errp)
@@ -1510,7 +1588,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 }
 
 /**
- * ext4_new_blocks() -- core block(s) allocation function
+ * ext4_new_blocks_old() -- core block(s) allocation function
  * @handle:            handle to this transaction
  * @inode:             file inode
  * @goal:              given target block(filesystem wide)
@@ -1523,17 +1601,17 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
  * any specific goal block.
  *
  */
-ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
+ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
                        ext4_fsblk_t goal, unsigned long *count, int *errp)
 {
        struct buffer_head *bitmap_bh = NULL;
        struct buffer_head *gdp_bh;
-       unsigned long group_no;
-       int goal_group;
+       ext4_group_t group_no;
+       ext4_group_t goal_group;
        ext4_grpblk_t grp_target_blk;   /* blockgroup relative goal block */
        ext4_grpblk_t grp_alloc_blk;    /* blockgroup-relative allocated block*/
        ext4_fsblk_t ret_block;         /* filesyetem-wide allocated block */
-       int bgi;                        /* blockgroup iteration index */
+       ext4_group_t bgi;                       /* blockgroup iteration index */
        int fatal = 0, err;
        int performed_allocation = 0;
        ext4_grpblk_t free_blocks;      /* number of free blocks in a group */
@@ -1544,10 +1622,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
        struct ext4_reserve_window_node *my_rsv = NULL;
        struct ext4_block_alloc_info *block_i;
        unsigned short windowsz = 0;
-#ifdef EXT4FS_DEBUG
-       static int goal_hits, goal_attempts;
-#endif
-       unsigned long ngroups;
+       ext4_group_t ngroups;
        unsigned long num = *count;
 
        *errp = -ENOSPC;
@@ -1567,7 +1642,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
 
        sbi = EXT4_SB(sb);
        es = EXT4_SB(sb)->s_es;
-       ext4_debug("goal=%lu.\n", goal);
+       ext4_debug("goal=%llu.\n", goal);
        /*
         * Allocate a block from reservation only when
         * filesystem is mounted with reservation(default,-o reservation), and
@@ -1677,7 +1752,7 @@ retry_alloc:
 
 allocated:
 
-       ext4_debug("using block group %d(%d)\n",
+       ext4_debug("using block group %lu(%d)\n",
                        group_no, gdp->bg_free_blocks_count);
 
        BUFFER_TRACE(gdp_bh, "get_write_access");
@@ -1692,11 +1767,13 @@ allocated:
            in_range(ret_block, ext4_inode_table(sb, gdp),
                     EXT4_SB(sb)->s_itb_per_group) ||
            in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
-                    EXT4_SB(sb)->s_itb_per_group))
+                    EXT4_SB(sb)->s_itb_per_group)) {
                ext4_error(sb, "ext4_new_block",
                            "Allocating block in system zone - "
                            "blocks from %llu, length %lu",
                             ret_block, num);
+               goto out;
+       }
 
        performed_allocation = 1;
 
@@ -1743,9 +1820,6 @@ allocated:
         * list of some description.  We don't know in advance whether
         * the caller wants to use it as metadata or data.
         */
-       ext4_debug("allocating block %lu. Goal hits %d of %d.\n",
-                       ret_block, goal_hits, goal_attempts);
-
        spin_lock(sb_bgl_lock(sbi, group_no));
        if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
                gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
@@ -1787,13 +1861,46 @@ out:
 }
 
 ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t goal, int *errp)
+               ext4_fsblk_t goal, int *errp)
 {
-       unsigned long count = 1;
+       struct ext4_allocation_request ar;
+       ext4_fsblk_t ret;
 
-       return ext4_new_blocks(handle, inode, goal, &count, errp);
+       if (!test_opt(inode->i_sb, MBALLOC)) {
+               unsigned long count = 1;
+               ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
+               return ret;
+       }
+
+       memset(&ar, 0, sizeof(ar));
+       ar.inode = inode;
+       ar.goal = goal;
+       ar.len = 1;
+       ret = ext4_mb_new_blocks(handle, &ar, errp);
+       return ret;
+}
+
+ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
+               ext4_fsblk_t goal, unsigned long *count, int *errp)
+{
+       struct ext4_allocation_request ar;
+       ext4_fsblk_t ret;
+
+       if (!test_opt(inode->i_sb, MBALLOC)) {
+               ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
+               return ret;
+       }
+
+       memset(&ar, 0, sizeof(ar));
+       ar.inode = inode;
+       ar.goal = goal;
+       ar.len = *count;
+       ret = ext4_mb_new_blocks(handle, &ar, errp);
+       *count = ar.len;
+       return ret;
 }
 
+
 /**
  * ext4_count_free_blocks() -- count filesystem free blocks
  * @sb:                superblock
@@ -1804,8 +1911,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 {
        ext4_fsblk_t desc_count;
        struct ext4_group_desc *gdp;
-       int i;
-       unsigned long ngroups = EXT4_SB(sb)->s_groups_count;
+       ext4_group_t i;
+       ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
 #ifdef EXT4FS_DEBUG
        struct ext4_super_block *es;
        ext4_fsblk_t bitmap_count;
@@ -1829,14 +1936,14 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
                        continue;
 
                x = ext4_count_free(bitmap_bh, sb->s_blocksize);
-               printk("group %d: stored = %d, counted = %lu\n",
+               printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
                        i, le16_to_cpu(gdp->bg_free_blocks_count), x);
                bitmap_count += x;
        }
        brelse(bitmap_bh);
        printk("ext4_count_free_blocks: stored = %llu"
                ", computed = %llu, %llu\n",
-              EXT4_FREE_BLOCKS_COUNT(es),
+               ext4_free_blocks_count(es),
                desc_count, bitmap_count);
        return bitmap_count;
 #else
@@ -1853,7 +1960,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 #endif
 }
 
-static inline int test_root(int a, int b)
+static inline int test_root(ext4_group_t a, int b)
 {
        int num = b;
 
@@ -1862,7 +1969,7 @@ static inline int test_root(int a, int b)
        return num == a;
 }
 
-static int ext4_group_sparse(int group)
+static int ext4_group_sparse(ext4_group_t group)
 {
        if (group <= 1)
                return 1;
@@ -1880,7 +1987,7 @@ static int ext4_group_sparse(int group)
  *     Return the number of blocks used by the superblock (primary or backup)
  *     in this group.  Currently this will be only 0 or 1.
  */
-int ext4_bg_has_super(struct super_block *sb, int group)
+int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
 {
        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
@@ -1889,18 +1996,20 @@ int ext4_bg_has_super(struct super_block *sb, int group)
        return 1;
 }
 
-static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, int group)
+static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
+                                       ext4_group_t group)
 {
        unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
-       unsigned long first = metagroup * EXT4_DESC_PER_BLOCK(sb);
-       unsigned long last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
+       ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb);
+       ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
 
        if (group == first || group == first + 1 || group == last)
                return 1;
        return 0;
 }
 
-static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group)
+static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
+                                       ext4_group_t group)
 {
        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
@@ -1918,7 +2027,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group)
  *     (primary or backup) in this group.  In the future there may be a
  *     different number of descriptor blocks in each group.
  */
-unsigned long ext4_bg_num_gdb(struct super_block *sb, int group)
+unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
 {
        unsigned long first_meta_bg =
                        le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
index f612bef983158f4a82c3d2a4b1211a94e713019a..33888bb58144fc1660d4106dbcbd689b560d2aa0 100644 (file)
@@ -67,7 +67,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
                          unsigned long offset)
 {
        const char * error_msg = NULL;
-       const int rlen = le16_to_cpu(de->rec_len);
+       const int rlen = ext4_rec_len_from_disk(de->rec_len);
 
        if (rlen < EXT4_DIR_REC_LEN(1))
                error_msg = "rec_len is smaller than minimal";
@@ -124,7 +124,7 @@ static int ext4_readdir(struct file * filp,
        offset = filp->f_pos & (sb->s_blocksize - 1);
 
        while (!error && !stored && filp->f_pos < inode->i_size) {
-               unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
+               ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
                struct buffer_head map_bh;
                struct buffer_head *bh = NULL;
 
@@ -172,10 +172,10 @@ revalidate:
                                 * least that it is non-zero.  A
                                 * failure will be detected in the
                                 * dirent test below. */
-                               if (le16_to_cpu(de->rec_len) <
-                                               EXT4_DIR_REC_LEN(1))
+                               if (ext4_rec_len_from_disk(de->rec_len)
+                                               EXT4_DIR_REC_LEN(1))
                                        break;
-                               i += le16_to_cpu(de->rec_len);
+                               i += ext4_rec_len_from_disk(de->rec_len);
                        }
                        offset = i;
                        filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
@@ -197,7 +197,7 @@ revalidate:
                                ret = stored;
                                goto out;
                        }
-                       offset += le16_to_cpu(de->rec_len);
+                       offset += ext4_rec_len_from_disk(de->rec_len);
                        if (le32_to_cpu(de->inode)) {
                                /* We might block in the next section
                                 * if the data destination is
@@ -219,7 +219,7 @@ revalidate:
                                        goto revalidate;
                                stored ++;
                        }
-                       filp->f_pos += le16_to_cpu(de->rec_len);
+                       filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
                }
                offset = 0;
                brelse (bh);
index 85287742f2ae487562be3a8ac39e40774a5de9d3..bc7081f1fbe80dd5add381c5746b1bbc74e4883a 100644 (file)
@@ -61,7 +61,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
  * idx_pblock:
  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
  */
-static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
+ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 {
        ext4_fsblk_t block;
 
@@ -75,7 +75,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
  * stores a large physical block number into an extent struct,
  * breaking it into parts
  */
-static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
+void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 {
        ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
        ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
@@ -144,7 +144,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
 
 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
                              struct ext4_ext_path *path,
-                             ext4_fsblk_t block)
+                             ext4_lblk_t block)
 {
        struct ext4_inode_info *ei = EXT4_I(inode);
        ext4_fsblk_t bg_start;
@@ -367,13 +367,14 @@ static void ext4_ext_drop_refs(struct ext4_ext_path *path)
  * the header must be checked before calling this
  */
 static void
-ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block)
+ext4_ext_binsearch_idx(struct inode *inode,
+                       struct ext4_ext_path *path, ext4_lblk_t block)
 {
        struct ext4_extent_header *eh = path->p_hdr;
        struct ext4_extent_idx *r, *l, *m;
 
 
-       ext_debug("binsearch for %d(idx):  ", block);
+       ext_debug("binsearch for %u(idx):  ", block);
 
        l = EXT_FIRST_INDEX(eh) + 1;
        r = EXT_LAST_INDEX(eh);
@@ -425,7 +426,8 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
  * the header must be checked before calling this
  */
 static void
-ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
+ext4_ext_binsearch(struct inode *inode,
+               struct ext4_ext_path *path, ext4_lblk_t block)
 {
        struct ext4_extent_header *eh = path->p_hdr;
        struct ext4_extent *r, *l, *m;
@@ -438,7 +440,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
                return;
        }
 
-       ext_debug("binsearch for %d:  ", block);
+       ext_debug("binsearch for %u:  ", block);
 
        l = EXT_FIRST_EXTENT(eh) + 1;
        r = EXT_LAST_EXTENT(eh);
@@ -494,7 +496,8 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
 }
 
 struct ext4_ext_path *
-ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
+ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
+                                       struct ext4_ext_path *path)
 {
        struct ext4_extent_header *eh;
        struct buffer_head *bh;
@@ -763,7 +766,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        while (k--) {
                oldblock = newblock;
                newblock = ablocks[--a];
-               bh = sb_getblk(inode->i_sb, (ext4_fsblk_t)newblock);
+               bh = sb_getblk(inode->i_sb, newblock);
                if (!bh) {
                        err = -EIO;
                        goto cleanup;
@@ -783,9 +786,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                fidx->ei_block = border;
                ext4_idx_store_pblock(fidx, oldblock);
 
-               ext_debug("int.index at %d (block %llu): %lu -> %llu\n", i,
-                               newblock, (unsigned long) le32_to_cpu(border),
-                               oldblock);
+               ext_debug("int.index at %d (block %llu): %u -> %llu\n",
+                               i, newblock, le32_to_cpu(border), oldblock);
                /* copy indexes */
                m = 0;
                path[i].p_idx++;
@@ -851,7 +853,7 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
-                       ext4_free_blocks(handle, inode, ablocks[i], 1);
+                       ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
                }
        }
        kfree(ablocks);
@@ -979,8 +981,8 @@ repeat:
                /* refill path */
                ext4_ext_drop_refs(path);
                path = ext4_ext_find_extent(inode,
-                                           le32_to_cpu(newext->ee_block),
-                                           path);
+                                   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
+                                   path);
                if (IS_ERR(path))
                        err = PTR_ERR(path);
        } else {
@@ -992,8 +994,8 @@ repeat:
                /* refill path */
                ext4_ext_drop_refs(path);
                path = ext4_ext_find_extent(inode,
-                                           le32_to_cpu(newext->ee_block),
-                                           path);
+                                  (ext4_lblk_t)le32_to_cpu(newext->ee_block),
+                                   path);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        goto out;
@@ -1014,6 +1016,150 @@ out:
        return err;
 }
 
+/*
+ * search the closest allocated block to the left for *logical
+ * and returns it at @logical + it's physical address at @phys
+ * if *logical is the smallest allocated block, the function
+ * returns 0 at @phys
+ * return value contains 0 (success) or error code
+ */
+int
+ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
+                       ext4_lblk_t *logical, ext4_fsblk_t *phys)
+{
+       struct ext4_extent_idx *ix;
+       struct ext4_extent *ex;
+       int depth, ee_len;
+
+       BUG_ON(path == NULL);
+       depth = path->p_depth;
+       *phys = 0;
+
+       if (depth == 0 && path->p_ext == NULL)
+               return 0;
+
+       /* usually extent in the path covers blocks smaller
+        * then *logical, but it can be that extent is the
+        * first one in the file */
+
+       ex = path[depth].p_ext;
+       ee_len = ext4_ext_get_actual_len(ex);
+       if (*logical < le32_to_cpu(ex->ee_block)) {
+               BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
+               while (--depth >= 0) {
+                       ix = path[depth].p_idx;
+                       BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
+               }
+               return 0;
+       }
+
+       BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
+
+       *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
+       *phys = ext_pblock(ex) + ee_len - 1;
+       return 0;
+}
+
+/*
+ * search the closest allocated block to the right for *logical
+ * and returns it at @logical + it's physical address at @phys
+ * if *logical is the smallest allocated block, the function
+ * returns 0 at @phys
+ * return value contains 0 (success) or error code
+ */
+int
+ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
+                       ext4_lblk_t *logical, ext4_fsblk_t *phys)
+{
+       struct buffer_head *bh = NULL;
+       struct ext4_extent_header *eh;
+       struct ext4_extent_idx *ix;
+       struct ext4_extent *ex;
+       ext4_fsblk_t block;
+       int depth, ee_len;
+
+       BUG_ON(path == NULL);
+       depth = path->p_depth;
+       *phys = 0;
+
+       if (depth == 0 && path->p_ext == NULL)
+               return 0;
+
+       /* usually extent in the path covers blocks smaller
+        * then *logical, but it can be that extent is the
+        * first one in the file */
+
+       ex = path[depth].p_ext;
+       ee_len = ext4_ext_get_actual_len(ex);
+       if (*logical < le32_to_cpu(ex->ee_block)) {
+               BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
+               while (--depth >= 0) {
+                       ix = path[depth].p_idx;
+                       BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
+               }
+               *logical = le32_to_cpu(ex->ee_block);
+               *phys = ext_pblock(ex);
+               return 0;
+       }
+
+       BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
+
+       if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
+               /* next allocated block in this leaf */
+               ex++;
+               *logical = le32_to_cpu(ex->ee_block);
+               *phys = ext_pblock(ex);
+               return 0;
+       }
+
+       /* go up and search for index to the right */
+       while (--depth >= 0) {
+               ix = path[depth].p_idx;
+               if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
+                       break;
+       }
+
+       if (depth < 0) {
+               /* we've gone up to the root and
+                * found no index to the right */
+               return 0;
+       }
+
+       /* we've found index to the right, let's
+        * follow it and find the closest allocated
+        * block to the right */
+       ix++;
+       block = idx_pblock(ix);
+       while (++depth < path->p_depth) {
+               bh = sb_bread(inode->i_sb, block);
+               if (bh == NULL)
+                       return -EIO;
+               eh = ext_block_hdr(bh);
+               if (ext4_ext_check_header(inode, eh, depth)) {
+                       put_bh(bh);
+                       return -EIO;
+               }
+               ix = EXT_FIRST_INDEX(eh);
+               block = idx_pblock(ix);
+               put_bh(bh);
+       }
+
+       bh = sb_bread(inode->i_sb, block);
+       if (bh == NULL)
+               return -EIO;
+       eh = ext_block_hdr(bh);
+       if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
+               put_bh(bh);
+               return -EIO;
+       }
+       ex = EXT_FIRST_EXTENT(eh);
+       *logical = le32_to_cpu(ex->ee_block);
+       *phys = ext_pblock(ex);
+       put_bh(bh);
+       return 0;
+
+}
+
 /*
  * ext4_ext_next_allocated_block:
  * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
@@ -1021,7 +1167,7 @@ out:
  * allocated block. Thus, index entries have to be consistent
  * with leaves.
  */
-static unsigned long
+static ext4_lblk_t
 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
 {
        int depth;
@@ -1054,7 +1200,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
  * ext4_ext_next_leaf_block:
  * returns first allocated block from next leaf or EXT_MAX_BLOCK
  */
-static unsigned ext4_ext_next_leaf_block(struct inode *inode,
+static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
                                        struct ext4_ext_path *path)
 {
        int depth;
@@ -1072,7 +1218,8 @@ static unsigned ext4_ext_next_leaf_block(struct inode *inode,
        while (depth >= 0) {
                if (path[depth].p_idx !=
                                EXT_LAST_INDEX(path[depth].p_hdr))
-                 return le32_to_cpu(path[depth].p_idx[1].ei_block);
+                       return (ext4_lblk_t)
+                               le32_to_cpu(path[depth].p_idx[1].ei_block);
                depth--;
        }
 
@@ -1085,7 +1232,7 @@ static unsigned ext4_ext_next_leaf_block(struct inode *inode,
  * then we have to correct all indexes above.
  * TODO: do we need to correct tree in all cases?
  */
-int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
+static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
                                struct ext4_ext_path *path)
 {
        struct ext4_extent_header *eh;
@@ -1171,7 +1318,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
        if (ext1_ee_len + ext2_ee_len > max_len)
                return 0;
 #ifdef AGGRESSIVE_TEST
-       if (le16_to_cpu(ex1->ee_len) >= 4)
+       if (ext1_ee_len >= 4)
                return 0;
 #endif
 
@@ -1239,7 +1386,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
                                    struct ext4_extent *newext,
                                    struct ext4_ext_path *path)
 {
-       unsigned long b1, b2;
+       ext4_lblk_t b1, b2;
        unsigned int depth, len1;
        unsigned int ret = 0;
 
@@ -1260,7 +1407,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
                        goto out;
        }
 
-       /* check for wrap through zero */
+       /* check for wrap through zero on extent logical start block*/
        if (b1 + len1 < b1) {
                len1 = EXT_MAX_BLOCK - b1;
                newext->ee_len = cpu_to_le16(len1);
@@ -1290,7 +1437,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
        struct ext4_extent *ex, *fex;
        struct ext4_extent *nearex; /* nearest extent */
        struct ext4_ext_path *npath = NULL;
-       int depth, len, err, next;
+       int depth, len, err;
+       ext4_lblk_t next;
        unsigned uninitialized = 0;
 
        BUG_ON(ext4_ext_get_actual_len(newext) == 0);
@@ -1435,114 +1583,8 @@ cleanup:
        return err;
 }
 
-int ext4_ext_walk_space(struct inode *inode, unsigned long block,
-                       unsigned long num, ext_prepare_callback func,
-                       void *cbdata)
-{
-       struct ext4_ext_path *path = NULL;
-       struct ext4_ext_cache cbex;
-       struct ext4_extent *ex;
-       unsigned long next, start = 0, end = 0;
-       unsigned long last = block + num;
-       int depth, exists, err = 0;
-
-       BUG_ON(func == NULL);
-       BUG_ON(inode == NULL);
-
-       while (block < last && block != EXT_MAX_BLOCK) {
-               num = last - block;
-               /* find extent for this block */
-               path = ext4_ext_find_extent(inode, block, path);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
-                       path = NULL;
-                       break;
-               }
-
-               depth = ext_depth(inode);
-               BUG_ON(path[depth].p_hdr == NULL);
-               ex = path[depth].p_ext;
-               next = ext4_ext_next_allocated_block(path);
-
-               exists = 0;
-               if (!ex) {
-                       /* there is no extent yet, so try to allocate
-                        * all requested space */
-                       start = block;
-                       end = block + num;
-               } else if (le32_to_cpu(ex->ee_block) > block) {
-                       /* need to allocate space before found extent */
-                       start = block;
-                       end = le32_to_cpu(ex->ee_block);
-                       if (block + num < end)
-                               end = block + num;
-               } else if (block >= le32_to_cpu(ex->ee_block)
-                                       + ext4_ext_get_actual_len(ex)) {
-                       /* need to allocate space after found extent */
-                       start = block;
-                       end = block + num;
-                       if (end >= next)
-                               end = next;
-               } else if (block >= le32_to_cpu(ex->ee_block)) {
-                       /*
-                        * some part of requested space is covered
-                        * by found extent
-                        */
-                       start = block;
-                       end = le32_to_cpu(ex->ee_block)
-                               + ext4_ext_get_actual_len(ex);
-                       if (block + num < end)
-                               end = block + num;
-                       exists = 1;
-               } else {
-                       BUG();
-               }
-               BUG_ON(end <= start);
-
-               if (!exists) {
-                       cbex.ec_block = start;
-                       cbex.ec_len = end - start;
-                       cbex.ec_start = 0;
-                       cbex.ec_type = EXT4_EXT_CACHE_GAP;
-               } else {
-                       cbex.ec_block = le32_to_cpu(ex->ee_block);
-                       cbex.ec_len = ext4_ext_get_actual_len(ex);
-                       cbex.ec_start = ext_pblock(ex);
-                       cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
-               }
-
-               BUG_ON(cbex.ec_len == 0);
-               err = func(inode, path, &cbex, cbdata);
-               ext4_ext_drop_refs(path);
-
-               if (err < 0)
-                       break;
-               if (err == EXT_REPEAT)
-                       continue;
-               else if (err == EXT_BREAK) {
-                       err = 0;
-                       break;
-               }
-
-               if (ext_depth(inode) != depth) {
-                       /* depth was changed. we have to realloc path */
-                       kfree(path);
-                       path = NULL;
-               }
-
-               block = cbex.ec_block + cbex.ec_len;
-       }
-
-       if (path) {
-               ext4_ext_drop_refs(path);
-               kfree(path);
-       }
-
-       return err;
-}
-
 static void
-ext4_ext_put_in_cache(struct inode *inode, __u32 block,
+ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
                        __u32 len, ext4_fsblk_t start, int type)
 {
        struct ext4_ext_cache *cex;
@@ -1561,10 +1603,11 @@ ext4_ext_put_in_cache(struct inode *inode, __u32 block,
  */
 static void
 ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
-                               unsigned long block)
+                               ext4_lblk_t block)
 {
        int depth = ext_depth(inode);
-       unsigned long lblock, len;
+       unsigned long len;
+       ext4_lblk_t lblock;
        struct ext4_extent *ex;
 
        ex = path[depth].p_ext;
@@ -1576,32 +1619,34 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
        } else if (block < le32_to_cpu(ex->ee_block)) {
                lblock = block;
                len = le32_to_cpu(ex->ee_block) - block;
-               ext_debug("cache gap(before): %lu [%lu:%lu]",
-                               (unsigned long) block,
-                               (unsigned long) le32_to_cpu(ex->ee_block),
-                               (unsigned long) ext4_ext_get_actual_len(ex));
+               ext_debug("cache gap(before): %u [%u:%u]",
+                               block,
+                               le32_to_cpu(ex->ee_block),
+                                ext4_ext_get_actual_len(ex));
        } else if (block >= le32_to_cpu(ex->ee_block)
                        + ext4_ext_get_actual_len(ex)) {
+               ext4_lblk_t next;
                lblock = le32_to_cpu(ex->ee_block)
                        + ext4_ext_get_actual_len(ex);
-               len = ext4_ext_next_allocated_block(path);
-               ext_debug("cache gap(after): [%lu:%lu] %lu",
-                               (unsigned long) le32_to_cpu(ex->ee_block),
-                               (unsigned long) ext4_ext_get_actual_len(ex),
-                               (unsigned long) block);
-               BUG_ON(len == lblock);
-               len = len - lblock;
+
+               next = ext4_ext_next_allocated_block(path);
+               ext_debug("cache gap(after): [%u:%u] %u",
+                               le32_to_cpu(ex->ee_block),
+                               ext4_ext_get_actual_len(ex),
+                               block);
+               BUG_ON(next == lblock);
+               len = next - lblock;
        } else {
                lblock = len = 0;
                BUG();
        }
 
-       ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len);
+       ext_debug(" -> %u:%lu\n", lblock, len);
        ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
 }
 
 static int
-ext4_ext_in_cache(struct inode *inode, unsigned long block,
+ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
                        struct ext4_extent *ex)
 {
        struct ext4_ext_cache *cex;
@@ -1618,11 +1663,9 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
                ex->ee_block = cpu_to_le32(cex->ec_block);
                ext4_ext_store_pblock(ex, cex->ec_start);
                ex->ee_len = cpu_to_le16(cex->ec_len);
-               ext_debug("%lu cached by %lu:%lu:%llu\n",
-                               (unsigned long) block,
-                               (unsigned long) cex->ec_block,
-                               (unsigned long) cex->ec_len,
-                               cex->ec_start);
+               ext_debug("%u cached by %u:%u:%llu\n",
+                               block,
+                               cex->ec_block, cex->ec_len, cex->ec_start);
                return cex->ec_type;
        }
 
@@ -1636,7 +1679,7 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
  * It's used in truncate case only, thus all requests are for
  * last index in the block only.
  */
-int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
+static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
                        struct ext4_ext_path *path)
 {
        struct buffer_head *bh;
@@ -1657,7 +1700,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        ext_debug("index is empty, remove it, free block %llu\n", leaf);
        bh = sb_find_get_block(inode->i_sb, leaf);
        ext4_forget(handle, 1, inode, bh, leaf);
-       ext4_free_blocks(handle, inode, leaf, 1);
+       ext4_free_blocks(handle, inode, leaf, 1, 1);
        return err;
 }
 
@@ -1666,7 +1709,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
  * This routine returns max. credits that the extent tree can consume.
  * It should be OK for low-performance paths like ->writepage()
  * To allow many writing processes to fit into a single transaction,
- * the caller should calculate credits under truncate_mutex and
+ * the caller should calculate credits under i_data_sem and
  * pass the actual path.
  */
 int ext4_ext_calc_credits_for_insert(struct inode *inode,
@@ -1714,12 +1757,14 @@ int ext4_ext_calc_credits_for_insert(struct inode *inode,
 
 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                                struct ext4_extent *ex,
-                               unsigned long from, unsigned long to)
+                               ext4_lblk_t from, ext4_lblk_t to)
 {
        struct buffer_head *bh;
        unsigned short ee_len =  ext4_ext_get_actual_len(ex);
-       int i;
+       int i, metadata = 0;
 
+       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+               metadata = 1;
 #ifdef EXTENTS_STATS
        {
                struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -1738,42 +1783,45 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
        if (from >= le32_to_cpu(ex->ee_block)
            && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
                /* tail removal */
-               unsigned long num;
+               ext4_lblk_t num;
                ext4_fsblk_t start;
+
                num = le32_to_cpu(ex->ee_block) + ee_len - from;
                start = ext_pblock(ex) + ee_len - num;
-               ext_debug("free last %lu blocks starting %llu\n", num, start);
+               ext_debug("free last %u blocks starting %llu\n", num, start);
                for (i = 0; i < num; i++) {
                        bh = sb_find_get_block(inode->i_sb, start + i);
                        ext4_forget(handle, 0, inode, bh, start + i);
                }
-               ext4_free_blocks(handle, inode, start, num);
+               ext4_free_blocks(handle, inode, start, num, metadata);
        } else if (from == le32_to_cpu(ex->ee_block)
                   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
-               printk("strange request: removal %lu-%lu from %u:%u\n",
+               printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
                        from, to, le32_to_cpu(ex->ee_block), ee_len);
        } else {
-               printk("strange request: removal(2) %lu-%lu from %u:%u\n",
-                       from, to, le32_to_cpu(ex->ee_block), ee_len);
+               printk(KERN_INFO "strange request: removal(2) "
+                               "%u-%u from %u:%u\n",
+                               from, to, le32_to_cpu(ex->ee_block), ee_len);
        }
        return 0;
 }
 
 static int
 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
-               struct ext4_ext_path *path, unsigned long start)
+               struct ext4_ext_path *path, ext4_lblk_t start)
 {
        int err = 0, correct_index = 0;
        int depth = ext_depth(inode), credits;
        struct ext4_extent_header *eh;
-       unsigned a, b, block, num;
-       unsigned long ex_ee_block;
+       ext4_lblk_t a, b, block;
+       unsigned num;
+       ext4_lblk_t ex_ee_block;
        unsigned short ex_ee_len;
        unsigned uninitialized = 0;
        struct ext4_extent *ex;
 
        /* the header must be checked already in ext4_ext_remove_space() */
-       ext_debug("truncate since %lu in leaf\n", start);
+       ext_debug("truncate since %u in leaf\n", start);
        if (!path[depth].p_hdr)
                path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
        eh = path[depth].p_hdr;
@@ -1904,7 +1952,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
        return 1;
 }
 
-int ext4_ext_remove_space(struct inode *inode, unsigned long start)
+static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
 {
        struct super_block *sb = inode->i_sb;
        int depth = ext_depth(inode);
@@ -1912,7 +1960,7 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
        handle_t *handle;
        int i = 0, err = 0;
 
-       ext_debug("truncate since %lu\n", start);
+       ext_debug("truncate since %u\n", start);
 
        /* probably first extent we're gonna free will be last in block */
        handle = ext4_journal_start(inode, depth + 1);
@@ -2094,17 +2142,19 @@ void ext4_ext_release(struct super_block *sb)
  *   b> Splits in two extents: Write is happening at either end of the extent
  *   c> Splits in three extents: Somone is writing in middle of the extent
  */
-int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
-                                       struct ext4_ext_path *path,
-                                       ext4_fsblk_t iblock,
-                                       unsigned long max_blocks)
+static int ext4_ext_convert_to_initialized(handle_t *handle,
+                                               struct inode *inode,
+                                               struct ext4_ext_path *path,
+                                               ext4_lblk_t iblock,
+                                               unsigned long max_blocks)
 {
        struct ext4_extent *ex, newex;
        struct ext4_extent *ex1 = NULL;
        struct ext4_extent *ex2 = NULL;
        struct ext4_extent *ex3 = NULL;
        struct ext4_extent_header *eh;
-       unsigned int allocated, ee_block, ee_len, depth;
+       ext4_lblk_t ee_block;
+       unsigned int allocated, ee_len, depth;
        ext4_fsblk_t newblock;
        int err = 0;
        int ret = 0;
@@ -2225,8 +2275,13 @@ out:
        return err ? err : allocated;
 }
 
+/*
+ * Need to be called with
+ * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
+ * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
+ */
 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t iblock,
+                       ext4_lblk_t iblock,
                        unsigned long max_blocks, struct buffer_head *bh_result,
                        int create, int extend_disksize)
 {
@@ -2236,11 +2291,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        ext4_fsblk_t goal, newblock;
        int err = 0, depth, ret;
        unsigned long allocated = 0;
+       struct ext4_allocation_request ar;
 
        __clear_bit(BH_New, &bh_result->b_state);
-       ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock,
-                       max_blocks, (unsigned) inode->i_ino);
-       mutex_lock(&EXT4_I(inode)->truncate_mutex);
+       ext_debug("blocks %u/%lu requested for inode %u\n",
+                       iblock, max_blocks, inode->i_ino);
 
        /* check in cache */
        goal = ext4_ext_in_cache(inode, iblock, &newex);
@@ -2260,7 +2315,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                   - le32_to_cpu(newex.ee_block)
                                   + ext_pblock(&newex);
                        /* number of remaining blocks in the extent */
-                       allocated = le16_to_cpu(newex.ee_len) -
+                       allocated = ext4_ext_get_actual_len(&newex) -
                                        (iblock - le32_to_cpu(newex.ee_block));
                        goto out;
                } else {
@@ -2288,7 +2343,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
        ex = path[depth].p_ext;
        if (ex) {
-               unsigned long ee_block = le32_to_cpu(ex->ee_block);
+               ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
                ext4_fsblk_t ee_start = ext_pblock(ex);
                unsigned short ee_len;
 
@@ -2302,7 +2357,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        newblock = iblock - ee_block + ee_start;
                        /* number of remaining blocks in the extent */
                        allocated = ee_len - (iblock - ee_block);
-                       ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
+                       ext_debug("%u fit into %lu:%d -> %llu\n", iblock,
                                        ee_block, ee_len, newblock);
 
                        /* Do not put uninitialized extent in the cache */
@@ -2320,9 +2375,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        ret = ext4_ext_convert_to_initialized(handle, inode,
                                                                path, iblock,
                                                                max_blocks);
-                       if (ret <= 0)
+                       if (ret <= 0) {
+                               err = ret;
                                goto out2;
-                       else
+                       else
                                allocated = ret;
                        goto outnew;
                }
@@ -2347,8 +2403,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
                ext4_init_block_alloc_info(inode);
 
-       /* allocate new block */
-       goal = ext4_ext_find_goal(inode, path, iblock);
+       /* find neighbour allocated blocks */
+       ar.lleft = iblock;
+       err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
+       if (err)
+               goto out2;
+       ar.lright = iblock;
+       err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
+       if (err)
+               goto out2;
 
        /*
         * See if request is beyond maximum number of blocks we can have in
@@ -2368,10 +2431,21 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        newex.ee_len = cpu_to_le16(max_blocks);
        err = ext4_ext_check_overlap(inode, &newex, path);
        if (err)
-               allocated = le16_to_cpu(newex.ee_len);
+               allocated = ext4_ext_get_actual_len(&newex);
        else
                allocated = max_blocks;
-       newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
+
+       /* allocate new block */
+       ar.inode = inode;
+       ar.goal = ext4_ext_find_goal(inode, path, iblock);
+       ar.logical = iblock;
+       ar.len = allocated;
+       if (S_ISREG(inode->i_mode))
+               ar.flags = EXT4_MB_HINT_DATA;
+       else
+               /* disable in-core preallocation for non-regular files */
+               ar.flags = 0;
+       newblock = ext4_mb_new_blocks(handle, &ar, &err);
        if (!newblock)
                goto out2;
        ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
@@ -2379,14 +2453,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
        /* try to insert new extent into found leaf and return */
        ext4_ext_store_pblock(&newex, newblock);
-       newex.ee_len = cpu_to_le16(allocated);
+       newex.ee_len = cpu_to_le16(ar.len);
        if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
                ext4_ext_mark_uninitialized(&newex);
        err = ext4_ext_insert_extent(handle, inode, path, &newex);
        if (err) {
                /* free data blocks we just allocated */
+               /* not a good idea to call discard here directly,
+                * but otherwise we'd need to call it every free() */
+               ext4_mb_discard_inode_preallocations(inode);
                ext4_free_blocks(handle, inode, ext_pblock(&newex),
-                                       le16_to_cpu(newex.ee_len));
+                                       ext4_ext_get_actual_len(&newex), 0);
                goto out2;
        }
 
@@ -2395,6 +2472,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
        /* previous routine could use block we allocated */
        newblock = ext_pblock(&newex);
+       allocated = ext4_ext_get_actual_len(&newex);
 outnew:
        __set_bit(BH_New, &bh_result->b_state);
 
@@ -2414,8 +2492,6 @@ out2:
                ext4_ext_drop_refs(path);
                kfree(path);
        }
-       mutex_unlock(&EXT4_I(inode)->truncate_mutex);
-
        return err ? err : allocated;
 }
 
@@ -2423,7 +2499,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
 {
        struct address_space *mapping = inode->i_mapping;
        struct super_block *sb = inode->i_sb;
-       unsigned long last_block;
+       ext4_lblk_t last_block;
        handle_t *handle;
        int err = 0;
 
@@ -2445,9 +2521,11 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
        if (page)
                ext4_block_truncate_page(handle, page, mapping, inode->i_size);
 
-       mutex_lock(&EXT4_I(inode)->truncate_mutex);
+       down_write(&EXT4_I(inode)->i_data_sem);
        ext4_ext_invalidate_cache(inode);
 
+       ext4_mb_discard_inode_preallocations(inode);
+
        /*
         * TODO: optimization is possible here.
         * Probably we need not scan at all,
@@ -2481,7 +2559,7 @@ out_stop:
        if (inode->i_nlink)
                ext4_orphan_del(handle, inode);
 
-       mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+       up_write(&EXT4_I(inode)->i_data_sem);
        ext4_journal_stop(handle);
 }
 
@@ -2516,7 +2594,8 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
 long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 {
        handle_t *handle;
-       ext4_fsblk_t block, max_blocks;
+       ext4_lblk_t block;
+       unsigned long max_blocks;
        ext4_fsblk_t nblocks = 0;
        int ret = 0;
        int ret2 = 0;
@@ -2544,6 +2623,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
         * modify 1 super block, 1 block bitmap and 1 group descriptor.
         */
        credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
+       down_write((&EXT4_I(inode)->i_data_sem));
 retry:
        while (ret >= 0 && ret < max_blocks) {
                block = block + ret;
@@ -2557,12 +2637,12 @@ retry:
                ret = ext4_ext_get_blocks(handle, inode, block,
                                          max_blocks, &map_bh,
                                          EXT4_CREATE_UNINITIALIZED_EXT, 0);
-               WARN_ON(!ret);
-               if (!ret) {
+               WARN_ON(ret <= 0);
+               if (ret <= 0) {
                        ext4_error(inode->i_sb, "ext4_fallocate",
-                                  "ext4_ext_get_blocks returned 0! inode#%lu"
-                                  ", block=%llu, max_blocks=%llu",
-                                  inode->i_ino, block, max_blocks);
+                                   "ext4_ext_get_blocks returned error: "
+                                   "inode#%lu, block=%u, max_blocks=%lu",
+                                   inode->i_ino, block, max_blocks);
                        ret = -EIO;
                        ext4_mark_inode_dirty(handle, inode);
                        ret2 = ext4_journal_stop(handle);
@@ -2600,6 +2680,7 @@ retry:
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
 
+       up_write((&EXT4_I(inode)->i_data_sem));
        /*
         * Time to update the file size.
         * Update only when preallocation was requested beyond the file size.
index 1a81cd66d63b2b2371e4c35025f004657ca66adf..ac35ec58db55c9e9fe4b4d3effde74274b614b25 100644 (file)
@@ -37,9 +37,9 @@ static int ext4_release_file (struct inode * inode, struct file * filp)
        if ((filp->f_mode & FMODE_WRITE) &&
                        (atomic_read(&inode->i_writecount) == 1))
        {
-               mutex_lock(&EXT4_I(inode)->truncate_mutex);
+               down_write(&EXT4_I(inode)->i_data_sem);
                ext4_discard_reservation(inode);
-               mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+               up_write(&EXT4_I(inode)->i_data_sem);
        }
        if (is_dx(inode) && filp->private_data)
                ext4_htree_free_dir_info(filp->private_data);
@@ -56,8 +56,25 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
        ssize_t ret;
        int err;
 
-       ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+       /*
+        * If we have encountered a bitmap-format file, the size limit
+        * is smaller than s_maxbytes, which is for extent-mapped files.
+        */
+
+       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+               struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+               size_t length = iov_length(iov, nr_segs);
 
+               if (pos > sbi->s_bitmap_maxbytes)
+                       return -EFBIG;
+
+               if (pos + length > sbi->s_bitmap_maxbytes) {
+                       nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
+                                             sbi->s_bitmap_maxbytes - pos);
+               }
+       }
+
+       ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
        /*
         * Skip flushing if there was an error, or if nothing was written.
         */
index 1577910bb58b4135d2bbfbb025c67037cd25785a..7eb0604e7eea8f76973175fc3ee6b247a79171cb 100644 (file)
@@ -14,14 +14,16 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
 extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
                                       struct ext4_group_desc *gdp);
 struct buffer_head *read_block_bitmap(struct super_block *sb,
-                                     unsigned int block_group);
+                                     ext4_group_t block_group);
 extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-                                      struct buffer_head *bh, int group,
+                                      struct buffer_head *bh,
+                                      ext4_group_t group,
                                       struct ext4_group_desc *desc);
 #define ext4_free_blocks_after_init(sb, group, desc)                   \
                ext4_init_block_bitmap(sb, NULL, group, desc)
 extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
-                                      struct buffer_head *bh, int group,
+                                      struct buffer_head *bh,
+                                      ext4_group_t group,
                                       struct ext4_group_desc *desc);
 extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
 #endif /* _LINUX_EXT4_GROUP_H */
index c61f37fd3f05e4d72061791d79600024c858df97..575b5215c8084f3613484555afb0f5727a404826 100644 (file)
@@ -64,8 +64,8 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
 }
 
 /* Initializes an uninitialized inode bitmap */
-unsigned ext4_init_inode_bitmap(struct super_block *sb,
-                               struct buffer_head *bh, int block_group,
+unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
+                               ext4_group_t block_group,
                                struct ext4_group_desc *gdp)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -75,7 +75,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb,
        /* If checksum is bad mark all blocks and inodes use to prevent
         * allocation, essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-               ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
+               ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n",
                           block_group);
                gdp->bg_free_blocks_count = 0;
                gdp->bg_free_inodes_count = 0;
@@ -98,7 +98,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb,
  * Return buffer_head of bitmap on success or NULL.
  */
 static struct buffer_head *
-read_inode_bitmap(struct super_block * sb, unsigned long block_group)
+read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 {
        struct ext4_group_desc *desc;
        struct buffer_head *bh = NULL;
@@ -152,7 +152,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
        unsigned long ino;
        struct buffer_head *bitmap_bh = NULL;
        struct buffer_head *bh2;
-       unsigned long block_group;
+       ext4_group_t block_group;
        unsigned long bit;
        struct ext4_group_desc * gdp;
        struct ext4_super_block * es;
@@ -260,12 +260,14 @@ error_return:
  * For other inodes, search forward from the parent directory\'s block
  * group to find a free inode.
  */
-static int find_group_dir(struct super_block *sb, struct inode *parent)
+static int find_group_dir(struct super_block *sb, struct inode *parent,
+                               ext4_group_t *best_group)
 {
-       int ngroups = EXT4_SB(sb)->s_groups_count;
+       ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
        unsigned int freei, avefreei;
        struct ext4_group_desc *desc, *best_desc = NULL;
-       int group, best_group = -1;
+       ext4_group_t group;
+       int ret = -1;
 
        freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
        avefreei = freei / ngroups;
@@ -279,11 +281,12 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
                if (!best_desc ||
                    (le16_to_cpu(desc->bg_free_blocks_count) >
                     le16_to_cpu(best_desc->bg_free_blocks_count))) {
-                       best_group = group;
+                       *best_group = group;
                        best_desc = desc;
+                       ret = 0;
                }
        }
-       return best_group;
+       return ret;
 }
 
 /*
@@ -314,12 +317,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
 #define INODE_COST 64
 #define BLOCK_COST 256
 
-static int find_group_orlov(struct super_block *sb, struct inode *parent)
+static int find_group_orlov(struct super_block *sb, struct inode *parent,
+                               ext4_group_t *group)
 {
-       int parent_group = EXT4_I(parent)->i_block_group;
+       ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_super_block *es = sbi->s_es;
-       int ngroups = sbi->s_groups_count;
+       ext4_group_t ngroups = sbi->s_groups_count;
        int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
        unsigned int freei, avefreei;
        ext4_fsblk_t freeb, avefreeb;
@@ -327,7 +331,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
        unsigned int ndirs;
        int max_debt, max_dirs, min_inodes;
        ext4_grpblk_t min_blocks;
-       int group = -1, i;
+       ext4_group_t i;
        struct ext4_group_desc *desc;
 
        freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
@@ -340,13 +344,14 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
        if ((parent == sb->s_root->d_inode) ||
            (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
                int best_ndir = inodes_per_group;
-               int best_group = -1;
+               ext4_group_t grp;
+               int ret = -1;
 
-               get_random_bytes(&group, sizeof(group));
-               parent_group = (unsigned)group % ngroups;
+               get_random_bytes(&grp, sizeof(grp));
+               parent_group = (unsigned)grp % ngroups;
                for (i = 0; i < ngroups; i++) {
-                       group = (parent_group + i) % ngroups;
-                       desc = ext4_get_group_desc (sb, group, NULL);
+                       grp = (parent_group + i) % ngroups;
+                       desc = ext4_get_group_desc(sb, grp, NULL);
                        if (!desc || !desc->bg_free_inodes_count)
                                continue;
                        if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
@@ -355,11 +360,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
                                continue;
                        if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
                                continue;
-                       best_group = group;
+                       *group = grp;
+                       ret = 0;
                        best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
                }
-               if (best_group >= 0)
-                       return best_group;
+               if (ret == 0)
+                       return ret;
                goto fallback;
        }
 
@@ -380,8 +386,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
                max_debt = 1;
 
        for (i = 0; i < ngroups; i++) {
-               group = (parent_group + i) % ngroups;
-               desc = ext4_get_group_desc (sb, group, NULL);
+               *group = (parent_group + i) % ngroups;
+               desc = ext4_get_group_desc(sb, *group, NULL);
                if (!desc || !desc->bg_free_inodes_count)
                        continue;
                if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
@@ -390,17 +396,16 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
                        continue;
                if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
                        continue;
-               return group;
+               return 0;
        }
 
 fallback:
        for (i = 0; i < ngroups; i++) {
-               group = (parent_group + i) % ngroups;
-               desc = ext4_get_group_desc (sb, group, NULL);
-               if (!desc || !desc->bg_free_inodes_count)
-                       continue;
-               if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
-                       return group;
+               *group = (parent_group + i) % ngroups;
+               desc = ext4_get_group_desc(sb, *group, NULL);
+               if (desc && desc->bg_free_inodes_count &&
+                       le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
+                       return 0;
        }
 
        if (avefreei) {
@@ -415,21 +420,22 @@ fallback:
        return -1;
 }
 
-static int find_group_other(struct super_block *sb, struct inode *parent)
+static int find_group_other(struct super_block *sb, struct inode *parent,
+                               ext4_group_t *group)
 {
-       int parent_group = EXT4_I(parent)->i_block_group;
-       int ngroups = EXT4_SB(sb)->s_groups_count;
+       ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
+       ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
        struct ext4_group_desc *desc;
-       int group, i;
+       ext4_group_t i;
 
        /*
         * Try to place the inode in its parent directory
         */
-       group = parent_group;
-       desc = ext4_get_group_desc (sb, group, NULL);
+       *group = parent_group;
+       desc = ext4_get_group_desc(sb, *group, NULL);
        if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
                        le16_to_cpu(desc->bg_free_blocks_count))
-               return group;
+               return 0;
 
        /*
         * We're going to place this inode in a different blockgroup from its
@@ -440,33 +446,33 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
         *
         * So add our directory's i_ino into the starting point for the hash.
         */
-       group = (group + parent->i_ino) % ngroups;
+       *group = (*group + parent->i_ino) % ngroups;
 
        /*
         * Use a quadratic hash to find a group with a free inode and some free
         * blocks.
         */
        for (i = 1; i < ngroups; i <<= 1) {
-               group += i;
-               if (group >= ngroups)
-                       group -= ngroups;
-               desc = ext4_get_group_desc (sb, group, NULL);
+               *group += i;
+               if (*group >= ngroups)
+                       *group -= ngroups;
+               desc = ext4_get_group_desc(sb, *group, NULL);
                if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
                                le16_to_cpu(desc->bg_free_blocks_count))
-                       return group;
+                       return 0;
        }
 
        /*
         * That failed: try linear search for a free inode, even if that group
         * has no free blocks.
         */
-       group = parent_group;
+       *group = parent_group;
        for (i = 0; i < ngroups; i++) {
-               if (++group >= ngroups)
-                       group = 0;
-               desc = ext4_get_group_desc (sb, group, NULL);
+               if (++*group >= ngroups)
+                       *group = 0;
+               desc = ext4_get_group_desc(sb, *group, NULL);
                if (desc && le16_to_cpu(desc->bg_free_inodes_count))
-                       return group;
+                       return 0;
        }
 
        return -1;
@@ -487,16 +493,17 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
        struct super_block *sb;
        struct buffer_head *bitmap_bh = NULL;
        struct buffer_head *bh2;
-       int group;
+       ext4_group_t group = 0;
        unsigned long ino = 0;
        struct inode * inode;
        struct ext4_group_desc * gdp = NULL;
        struct ext4_super_block * es;
        struct ext4_inode_info *ei;
        struct ext4_sb_info *sbi;
-       int err = 0;
+       int ret2, err = 0;
        struct inode *ret;
-       int i, free = 0;
+       ext4_group_t i;
+       int free = 0;
 
        /* Cannot create files in a deleted directory */
        if (!dir || !dir->i_nlink)
@@ -512,14 +519,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
        es = sbi->s_es;
        if (S_ISDIR(mode)) {
                if (test_opt (sb, OLDALLOC))
-                       group = find_group_dir(sb, dir);
+                       ret2 = find_group_dir(sb, dir, &group);
                else
-                       group = find_group_orlov(sb, dir);
+                       ret2 = find_group_orlov(sb, dir, &group);
        } else
-               group = find_group_other(sb, dir);
+               ret2 = find_group_other(sb, dir, &group);
 
        err = -ENOSPC;
-       if (group == -1)
+       if (ret2 == -1)
                goto out;
 
        for (i = 0; i < sbi->s_groups_count; i++) {
@@ -583,7 +590,7 @@ got:
            ino > EXT4_INODES_PER_GROUP(sb)) {
                ext4_error(sb, __FUNCTION__,
                           "reserved inode or inode > inodes count - "
-                          "block_group = %d, inode=%lu", group,
+                          "block_group = %lu, inode=%lu", group,
                           ino + group * EXT4_INODES_PER_GROUP(sb));
                err = -EIO;
                goto fail;
@@ -702,7 +709,6 @@ got:
        if (!S_ISDIR(mode))
                ei->i_flags &= ~EXT4_DIRSYNC_FL;
        ei->i_file_acl = 0;
-       ei->i_dir_acl = 0;
        ei->i_dtime = 0;
        ei->i_block_alloc_info = NULL;
        ei->i_block_group = group;
@@ -741,13 +747,10 @@ got:
        if (test_opt(sb, EXTENTS)) {
                EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
                ext4_ext_tree_init(handle, inode);
-               if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
-                       err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
-                       if (err) goto fail;
-                       EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS);
-                       BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata");
-                       err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
-               }
+               err = ext4_update_incompat_feature(handle, sb,
+                                               EXT4_FEATURE_INCOMPAT_EXTENTS);
+               if (err)
+                       goto fail;
        }
 
        ext4_debug("allocating inode %lu\n", inode->i_ino);
@@ -777,7 +780,7 @@ fail_drop:
 struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 {
        unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
-       unsigned long block_group;
+       ext4_group_t block_group;
        int bit;
        struct buffer_head *bitmap_bh = NULL;
        struct inode *inode = NULL;
@@ -833,7 +836,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
 {
        unsigned long desc_count;
        struct ext4_group_desc *gdp;
-       int i;
+       ext4_group_t i;
 #ifdef EXT4FS_DEBUG
        struct ext4_super_block *es;
        unsigned long bitmap_count, x;
@@ -854,7 +857,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
                        continue;
 
                x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
-               printk("group %d: stored = %d, counted = %lu\n",
+               printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
                        i, le16_to_cpu(gdp->bg_free_inodes_count), x);
                bitmap_count += x;
        }
@@ -879,7 +882,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
 unsigned long ext4_count_dirs (struct super_block * sb)
 {
        unsigned long count = 0;
-       int i;
+       ext4_group_t i;
 
        for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
                struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);
index 5489703d95738ffb075ed7cb275aab94eb4d74e6..bb717cbb749c822265bccdd8159de7af456ad745 100644 (file)
@@ -105,7 +105,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
  */
 static unsigned long blocks_for_truncate(struct inode *inode)
 {
-       unsigned long needed;
+       ext4_lblk_t needed;
 
        needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
 
@@ -243,13 +243,6 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
        p->bh = bh;
 }
 
-static int verify_chain(Indirect *from, Indirect *to)
-{
-       while (from <= to && from->key == *from->p)
-               from++;
-       return (from > to);
-}
-
 /**
  *     ext4_block_to_path - parse the block number into array of offsets
  *     @inode: inode in question (we are only interested in its superblock)
@@ -282,7 +275,8 @@ static int verify_chain(Indirect *from, Indirect *to)
  */
 
 static int ext4_block_to_path(struct inode *inode,
-                       long i_block, int offsets[4], int *boundary)
+                       ext4_lblk_t i_block,
+                       ext4_lblk_t offsets[4], int *boundary)
 {
        int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
        int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
@@ -313,7 +307,10 @@ static int ext4_block_to_path(struct inode *inode,
                offsets[n++] = i_block & (ptrs - 1);
                final = ptrs;
        } else {
-               ext4_warning(inode->i_sb, "ext4_block_to_path", "block > big");
+               ext4_warning(inode->i_sb, "ext4_block_to_path",
+                               "block %lu > max",
+                               i_block + direct_blocks +
+                               indirect_blocks + double_blocks);
        }
        if (boundary)
                *boundary = final - 1 - (i_block & (ptrs - 1));
@@ -344,12 +341,14 @@ static int ext4_block_to_path(struct inode *inode,
  *             (pointer to last triple returned, *@err == 0)
  *     or when it gets an IO error reading an indirect block
  *             (ditto, *@err == -EIO)
- *     or when it notices that chain had been changed while it was reading
- *             (ditto, *@err == -EAGAIN)
  *     or when it reads all @depth-1 indirect blocks successfully and finds
  *     the whole chain, all way to the data (returns %NULL, *err == 0).
+ *
+ *      Need to be called with
+ *      down_read(&EXT4_I(inode)->i_data_sem)
  */
-static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
+static Indirect *ext4_get_branch(struct inode *inode, int depth,
+                                ext4_lblk_t  *offsets,
                                 Indirect chain[4], int *err)
 {
        struct super_block *sb = inode->i_sb;
@@ -365,9 +364,6 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
                bh = sb_bread(sb, le32_to_cpu(p->key));
                if (!bh)
                        goto failure;
-               /* Reader: pointers */
-               if (!verify_chain(chain, p))
-                       goto changed;
                add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
                /* Reader: end */
                if (!p->key)
@@ -375,10 +371,6 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
        }
        return NULL;
 
-changed:
-       brelse(bh);
-       *err = -EAGAIN;
-       goto no_block;
 failure:
        *err = -EIO;
 no_block:
@@ -445,7 +437,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
  *     stores it in *@goal and returns zero.
  */
 
-static ext4_fsblk_t ext4_find_goal(struct inode *inode, long block,
+static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
                Indirect chain[4], Indirect *partial)
 {
        struct ext4_block_alloc_info *block_i;
@@ -559,7 +551,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
        return ret;
 failed_out:
        for (i = 0; i <index; i++)
-               ext4_free_blocks(handle, inode, new_blocks[i], 1);
+               ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
        return ret;
 }
 
@@ -590,7 +582,7 @@ failed_out:
  */
 static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
                        int indirect_blks, int *blks, ext4_fsblk_t goal,
-                       int *offsets, Indirect *branch)
+                       ext4_lblk_t *offsets, Indirect *branch)
 {
        int blocksize = inode->i_sb->s_blocksize;
        int i, n = 0;
@@ -658,9 +650,9 @@ failed:
                ext4_journal_forget(handle, branch[i].bh);
        }
        for (i = 0; i <indirect_blks; i++)
-               ext4_free_blocks(handle, inode, new_blocks[i], 1);
+               ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
 
-       ext4_free_blocks(handle, inode, new_blocks[i], num);
+       ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
 
        return err;
 }
@@ -680,7 +672,7 @@ failed:
  * chain to new block and return 0.
  */
 static int ext4_splice_branch(handle_t *handle, struct inode *inode,
-                       long block, Indirect *where, int num, int blks)
+                       ext4_lblk_t block, Indirect *where, int num, int blks)
 {
        int i;
        int err = 0;
@@ -757,9 +749,10 @@ err_out:
        for (i = 1; i <= num; i++) {
                BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
                ext4_journal_forget(handle, where[i].bh);
-               ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
+               ext4_free_blocks(handle, inode,
+                                       le32_to_cpu(where[i-1].key), 1, 0);
        }
-       ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
+       ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0);
 
        return err;
 }
@@ -782,14 +775,19 @@ err_out:
  * return > 0, # of blocks mapped or allocated.
  * return = 0, if plain lookup failed.
  * return < 0, error case.
+ *
+ *
+ * Need to be called with
+ * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
+ * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
  */
 int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
-               sector_t iblock, unsigned long maxblocks,
+               ext4_lblk_t iblock, unsigned long maxblocks,
                struct buffer_head *bh_result,
                int create, int extend_disksize)
 {
        int err = -EIO;
-       int offsets[4];
+       ext4_lblk_t offsets[4];
        Indirect chain[4];
        Indirect *partial;
        ext4_fsblk_t goal;
@@ -803,7 +801,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 
        J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
        J_ASSERT(handle != NULL || create == 0);
-       depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
+       depth = ext4_block_to_path(inode, iblock, offsets,
+                                       &blocks_to_boundary);
 
        if (depth == 0)
                goto out;
@@ -819,18 +818,6 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
                while (count < maxblocks && count <= blocks_to_boundary) {
                        ext4_fsblk_t blk;
 
-                       if (!verify_chain(chain, partial)) {
-                               /*
-                                * Indirect block might be removed by
-                                * truncate while we were reading it.
-                                * Handling of that case: forget what we've
-                                * got now. Flag the err as EAGAIN, so it
-                                * will reread.
-                                */
-                               err = -EAGAIN;
-                               count = 0;
-                               break;
-                       }
                        blk = le32_to_cpu(*(chain[depth-1].p + count));
 
                        if (blk == first_block + count)
@@ -838,44 +825,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
                        else
                                break;
                }
-               if (err != -EAGAIN)
-                       goto got_it;
+               goto got_it;
        }
 
        /* Next simple case - plain lookup or failed read of indirect block */
        if (!create || err == -EIO)
                goto cleanup;
 
-       mutex_lock(&ei->truncate_mutex);
-
-       /*
-        * If the indirect block is missing while we are reading
-        * the chain(ext4_get_branch() returns -EAGAIN err), or
-        * if the chain has been changed after we grab the semaphore,
-        * (either because another process truncated this branch, or
-        * another get_block allocated this branch) re-grab the chain to see if
-        * the request block has been allocated or not.
-        *
-        * Since we already block the truncate/other get_block
-        * at this point, we will have the current copy of the chain when we
-        * splice the branch into the tree.
-        */
-       if (err == -EAGAIN || !verify_chain(chain, partial)) {
-               while (partial > chain) {
-                       brelse(partial->bh);
-                       partial--;
-               }
-               partial = ext4_get_branch(inode, depth, offsets, chain, &err);
-               if (!partial) {
-                       count++;
-                       mutex_unlock(&ei->truncate_mutex);
-                       if (err)
-                               goto cleanup;
-                       clear_buffer_new(bh_result);
-                       goto got_it;
-               }
-       }
-
        /*
         * Okay, we need to do block allocation.  Lazily initialize the block
         * allocation info here if necessary
@@ -911,13 +867,12 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
                err = ext4_splice_branch(handle, inode, iblock,
                                        partial, indirect_blks, count);
        /*
-        * i_disksize growing is protected by truncate_mutex.  Don't forget to
+        * i_disksize growing is protected by i_data_sem.  Don't forget to
         * protect it if you're about to implement concurrent
         * ext4_get_block() -bzzz
        */
        if (!err && extend_disksize && inode->i_size > ei->i_disksize)
                ei->i_disksize = inode->i_size;
-       mutex_unlock(&ei->truncate_mutex);
        if (err)
                goto cleanup;
 
@@ -942,6 +897,47 @@ out:
 
 #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
 
+int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
+                       unsigned long max_blocks, struct buffer_head *bh,
+                       int create, int extend_disksize)
+{
+       int retval;
+       /*
+        * Try to see if we can get  the block without requesting
+        * for new file system block.
+        */
+       down_read((&EXT4_I(inode)->i_data_sem));
+       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
+                               bh, 0, 0);
+       } else {
+               retval = ext4_get_blocks_handle(handle,
+                               inode, block, max_blocks, bh, 0, 0);
+       }
+       up_read((&EXT4_I(inode)->i_data_sem));
+       if (!create || (retval > 0))
+               return retval;
+
+       /*
+        * We need to allocate new blocks which will result
+        * in i_data update
+        */
+       down_write((&EXT4_I(inode)->i_data_sem));
+       /*
+        * We need to check for EXT4 here because migrate
+        * could have changed the inode type in between
+        */
+       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
+                               bh, create, extend_disksize);
+       } else {
+               retval = ext4_get_blocks_handle(handle, inode, block,
+                               max_blocks, bh, create, extend_disksize);
+       }
+       up_write((&EXT4_I(inode)->i_data_sem));
+       return retval;
+}
+
 static int ext4_get_block(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create)
 {
@@ -996,7 +992,7 @@ get_block:
  * `handle' can be NULL if create is zero
  */
 struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
-                               long block, int create, int *errp)
+                               ext4_lblk_t block, int create, int *errp)
 {
        struct buffer_head dummy;
        int fatal = 0, err;
@@ -1063,7 +1059,7 @@ err:
 }
 
 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
-                              int block, int create, int *err)
+                              ext4_lblk_t block, int create, int *err)
 {
        struct buffer_head * bh;
 
@@ -1446,7 +1442,7 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  *     ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
  *
  * Same applies to ext4_get_block().  We will deadlock on various things like
- * lock_journal and i_truncate_mutex.
+ * lock_journal and i_data_sem
  *
  * Setting PF_MEMALLOC here doesn't work - too many internal memory
  * allocations fail.
@@ -1828,7 +1824,8 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
 {
        ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
        unsigned offset = from & (PAGE_CACHE_SIZE-1);
-       unsigned blocksize, iblock, length, pos;
+       unsigned blocksize, length, pos;
+       ext4_lblk_t iblock;
        struct inode *inode = mapping->host;
        struct buffer_head *bh;
        int err = 0;
@@ -1964,7 +1961,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
  *                     (no partially truncated stuff there).  */
 
 static Indirect *ext4_find_shared(struct inode *inode, int depth,
-                       int offsets[4], Indirect chain[4], __le32 *top)
+                       ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top)
 {
        Indirect *partial, *p;
        int k, err;
@@ -2048,15 +2045,15 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
        for (p = first; p < last; p++) {
                u32 nr = le32_to_cpu(*p);
                if (nr) {
-                       struct buffer_head *bh;
+                       struct buffer_head *tbh;
 
                        *p = 0;
-                       bh = sb_find_get_block(inode->i_sb, nr);
-                       ext4_forget(handle, 0, inode, bh, nr);
+                       tbh = sb_find_get_block(inode->i_sb, nr);
+                       ext4_forget(handle, 0, inode, tbh, nr);
                }
        }
 
-       ext4_free_blocks(handle, inode, block_to_free, count);
+       ext4_free_blocks(handle, inode, block_to_free, count, 0);
 }
 
 /**
@@ -2229,7 +2226,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                                ext4_journal_test_restart(handle, inode);
                        }
 
-                       ext4_free_blocks(handle, inode, nr, 1);
+                       ext4_free_blocks(handle, inode, nr, 1, 1);
 
                        if (parent_bh) {
                                /*
@@ -2289,12 +2286,12 @@ void ext4_truncate(struct inode *inode)
        __le32 *i_data = ei->i_data;
        int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
        struct address_space *mapping = inode->i_mapping;
-       int offsets[4];
+       ext4_lblk_t offsets[4];
        Indirect chain[4];
        Indirect *partial;
        __le32 nr = 0;
        int n;
-       long last_block;
+       ext4_lblk_t last_block;
        unsigned blocksize = inode->i_sb->s_blocksize;
        struct page *page;
 
@@ -2320,8 +2317,10 @@ void ext4_truncate(struct inode *inode)
                        return;
        }
 
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
-               return ext4_ext_truncate(inode, page);
+       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+               ext4_ext_truncate(inode, page);
+               return;
+       }
 
        handle = start_transaction(inode);
        if (IS_ERR(handle)) {
@@ -2369,7 +2368,7 @@ void ext4_truncate(struct inode *inode)
         * From here we block out all ext4_get_block() callers who want to
         * modify the block allocation tree.
         */
-       mutex_lock(&ei->truncate_mutex);
+       down_write(&ei->i_data_sem);
 
        if (n == 1) {           /* direct blocks */
                ext4_free_data(handle, inode, NULL, i_data+offsets[0],
@@ -2433,7 +2432,7 @@ do_indirects:
 
        ext4_discard_reservation(inode);
 
-       mutex_unlock(&ei->truncate_mutex);
+       up_write(&ei->i_data_sem);
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 
@@ -2460,7 +2459,8 @@ out_stop:
 static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
                unsigned long ino, struct ext4_iloc *iloc)
 {
-       unsigned long desc, group_desc, block_group;
+       unsigned long desc, group_desc;
+       ext4_group_t block_group;
        unsigned long offset;
        ext4_fsblk_t block;
        struct buffer_head *bh;
@@ -2547,7 +2547,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
                        struct ext4_group_desc *desc;
                        int inodes_per_buffer;
                        int inode_offset, i;
-                       int block_group;
+                       ext4_group_t block_group;
                        int start;
 
                        block_group = (inode->i_ino - 1) /
@@ -2660,6 +2660,28 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
        if (flags & S_DIRSYNC)
                ei->i_flags |= EXT4_DIRSYNC_FL;
 }
+static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
+                                       struct ext4_inode_info *ei)
+{
+       blkcnt_t i_blocks ;
+       struct inode *inode = &(ei->vfs_inode);
+       struct super_block *sb = inode->i_sb;
+
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                               EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
+               /* we are using combined 48 bit field */
+               i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
+                                       le32_to_cpu(raw_inode->i_blocks_lo);
+               if (ei->i_flags & EXT4_HUGE_FILE_FL) {
+                       /* i_blocks represent file system block size */
+                       return i_blocks  << (inode->i_blkbits - 9);
+               } else {
+                       return i_blocks;
+               }
+       } else {
+               return le32_to_cpu(raw_inode->i_blocks_lo);
+       }
+}
 
 void ext4_read_inode(struct inode * inode)
 {
@@ -2687,7 +2709,6 @@ void ext4_read_inode(struct inode * inode)
                inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
        }
        inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
-       inode->i_size = le32_to_cpu(raw_inode->i_size);
 
        ei->i_state = 0;
        ei->i_dir_start_lookup = 0;
@@ -2709,19 +2730,15 @@ void ext4_read_inode(struct inode * inode)
                 * recovery code: that's fine, we're about to complete
                 * the process of deleting those. */
        }
-       inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
        ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-       ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+       inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
+       ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
        if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
-           cpu_to_le32(EXT4_OS_HURD))
+           cpu_to_le32(EXT4_OS_HURD)) {
                ei->i_file_acl |=
                        ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
-       if (!S_ISREG(inode->i_mode)) {
-               ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
-       } else {
-               inode->i_size |=
-                       ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
        }
+       inode->i_size = ext4_isize(raw_inode);
        ei->i_disksize = inode->i_size;
        inode->i_generation = le32_to_cpu(raw_inode->i_generation);
        ei->i_block_group = iloc.block_group;
@@ -2765,6 +2782,13 @@ void ext4_read_inode(struct inode * inode)
        EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
        EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
 
+       inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+               if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
+                       inode->i_version |=
+                       (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
+       }
+
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext4_file_inode_operations;
                inode->i_fop = &ext4_file_operations;
@@ -2797,6 +2821,55 @@ bad_inode:
        return;
 }
 
+static int ext4_inode_blocks_set(handle_t *handle,
+                               struct ext4_inode *raw_inode,
+                               struct ext4_inode_info *ei)
+{
+       struct inode *inode = &(ei->vfs_inode);
+       u64 i_blocks = inode->i_blocks;
+       struct super_block *sb = inode->i_sb;
+       int err = 0;
+
+       if (i_blocks <= ~0U) {
+               /*
+                * i_blocks can be represnted in a 32 bit variable
+                * as multiple of 512 bytes
+                */
+               raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
+               raw_inode->i_blocks_high = 0;
+               ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+       } else if (i_blocks <= 0xffffffffffffULL) {
+               /*
+                * i_blocks can be represented in a 48 bit variable
+                * as multiple of 512 bytes
+                */
+               err = ext4_update_rocompat_feature(handle, sb,
+                                           EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
+               if (err)
+                       goto  err_out;
+               /* i_block is stored in the split  48 bit fields */
+               raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
+               raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
+               ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+       } else {
+               /*
+                * i_blocks should be represented in a 48 bit variable
+                * as multiple of  file system block size
+                */
+               err = ext4_update_rocompat_feature(handle, sb,
+                                           EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
+               if (err)
+                       goto  err_out;
+               ei->i_flags |= EXT4_HUGE_FILE_FL;
+               /* i_block is stored in file system block size */
+               i_blocks = i_blocks >> (inode->i_blkbits - 9);
+               raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
+               raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
+       }
+err_out:
+       return err;
+}
+
 /*
  * Post the struct inode info into an on-disk inode location in the
  * buffer-cache.  This gobbles the caller's reference to the
@@ -2845,47 +2918,42 @@ static int ext4_do_update_inode(handle_t *handle,
                raw_inode->i_gid_high = 0;
        }
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
-       raw_inode->i_size = cpu_to_le32(ei->i_disksize);
 
        EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
        EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
        EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
        EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
 
-       raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+       if (ext4_inode_blocks_set(handle, raw_inode, ei))
+               goto out_brelse;
        raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
        raw_inode->i_flags = cpu_to_le32(ei->i_flags);
        if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
            cpu_to_le32(EXT4_OS_HURD))
                raw_inode->i_file_acl_high =
                        cpu_to_le16(ei->i_file_acl >> 32);
-       raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
-       if (!S_ISREG(inode->i_mode)) {
-               raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
-       } else {
-               raw_inode->i_size_high =
-                       cpu_to_le32(ei->i_disksize >> 32);
-               if (ei->i_disksize > 0x7fffffffULL) {
-                       struct super_block *sb = inode->i_sb;
-                       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-                                       EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
-                           EXT4_SB(sb)->s_es->s_rev_level ==
-                                       cpu_to_le32(EXT4_GOOD_OLD_REV)) {
-                              /* If this is the first large file
-                               * created, add a flag to the superblock.
-                               */
-                               err = ext4_journal_get_write_access(handle,
-                                               EXT4_SB(sb)->s_sbh);
-                               if (err)
-                                       goto out_brelse;
-                               ext4_update_dynamic_rev(sb);
-                               EXT4_SET_RO_COMPAT_FEATURE(sb,
+       raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
+       ext4_isize_set(raw_inode, ei->i_disksize);
+       if (ei->i_disksize > 0x7fffffffULL) {
+               struct super_block *sb = inode->i_sb;
+               if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+                               EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
+                               EXT4_SB(sb)->s_es->s_rev_level ==
+                               cpu_to_le32(EXT4_GOOD_OLD_REV)) {
+                       /* If this is the first large file
+                        * created, add a flag to the superblock.
+                        */
+                       err = ext4_journal_get_write_access(handle,
+                                       EXT4_SB(sb)->s_sbh);
+                       if (err)
+                               goto out_brelse;
+                       ext4_update_dynamic_rev(sb);
+                       EXT4_SET_RO_COMPAT_FEATURE(sb,
                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
-                               sb->s_dirt = 1;
-                               handle->h_sync = 1;
-                               err = ext4_journal_dirty_metadata(handle,
-                                               EXT4_SB(sb)->s_sbh);
-                       }
+                       sb->s_dirt = 1;
+                       handle->h_sync = 1;
+                       err = ext4_journal_dirty_metadata(handle,
+                                       EXT4_SB(sb)->s_sbh);
                }
        }
        raw_inode->i_generation = cpu_to_le32(inode->i_generation);
@@ -2903,8 +2971,14 @@ static int ext4_do_update_inode(handle_t *handle,
        } else for (block = 0; block < EXT4_N_BLOCKS; block++)
                raw_inode->i_block[block] = ei->i_data[block];
 
-       if (ei->i_extra_isize)
+       raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
+       if (ei->i_extra_isize) {
+               if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
+                       raw_inode->i_version_hi =
+                       cpu_to_le32(inode->i_version >> 32);
                raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
+       }
+
 
        BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
        rc = ext4_journal_dirty_metadata(handle, bh);
@@ -3024,6 +3098,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                ext4_journal_stop(handle);
        }
 
+       if (attr->ia_valid & ATTR_SIZE) {
+               if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+                       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+                       if (attr->ia_size > sbi->s_bitmap_maxbytes) {
+                               error = -EFBIG;
+                               goto err_out;
+                       }
+               }
+       }
+
        if (S_ISREG(inode->i_mode) &&
            attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
                handle_t *handle;
@@ -3120,6 +3205,9 @@ int ext4_mark_iloc_dirty(handle_t *handle,
 {
        int err = 0;
 
+       if (test_opt(inode->i_sb, I_VERSION))
+               inode_inc_iversion(inode);
+
        /* the do_update_inode consumes one bh->b_count */
        get_bh(iloc->bh);
 
@@ -3158,8 +3246,10 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
  * Expand an inode by new_extra_isize bytes.
  * Returns 0 on success or negative error number on failure.
  */
-int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize,
-                       struct ext4_iloc iloc, handle_t *handle)
+static int ext4_expand_extra_isize(struct inode *inode,
+                                  unsigned int new_extra_isize,
+                                  struct ext4_iloc iloc,
+                                  handle_t *handle)
 {
        struct ext4_inode *raw_inode;
        struct ext4_xattr_ibody_header *header;
index e7f894bdb4202359974088ba5c1f14585a43fc03..2ed7c37f897e79f0b9c6d6af3a5c2aad79fa7f59 100644 (file)
@@ -199,7 +199,7 @@ flags_err:
                 * need to allocate reservation structure for this inode
                 * before set the window size
                 */
-               mutex_lock(&ei->truncate_mutex);
+               down_write(&ei->i_data_sem);
                if (!ei->i_block_alloc_info)
                        ext4_init_block_alloc_info(inode);
 
@@ -207,7 +207,7 @@ flags_err:
                        struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
                        rsv->rsv_goal_size = rsv_window_size;
                }
-               mutex_unlock(&ei->truncate_mutex);
+               up_write(&ei->i_data_sem);
                return 0;
        }
        case EXT4_IOC_GROUP_EXTEND: {
@@ -254,6 +254,9 @@ flags_err:
                return err;
        }
 
+       case EXT4_IOC_MIGRATE:
+               return ext4_ext_migrate(inode, filp, cmd, arg);
+
        default:
                return -ENOTTY;
        }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
new file mode 100644 (file)
index 0000000..76e5fed
--- /dev/null
@@ -0,0 +1,4552 @@
+/*
+ * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+
+
+/*
+ * mballoc.c contains the multiblocks allocation routines
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#include "group.h"
+
+/*
+ * MUSTDO:
+ *   - test ext4_ext_search_left() and ext4_ext_search_right()
+ *   - search for metadata in few groups
+ *
+ * TODO v4:
+ *   - normalization should take into account whether file is still open
+ *   - discard preallocations if no free space left (policy?)
+ *   - don't normalize tails
+ *   - quota
+ *   - reservation for superuser
+ *
+ * TODO v3:
+ *   - bitmap read-ahead (proposed by Oleg Drokin aka green)
+ *   - track min/max extents in each group for better group selection
+ *   - mb_mark_used() may allocate chunk right after splitting buddy
+ *   - tree of groups sorted by number of free blocks
+ *   - error handling
+ */
+
+/*
+ * The allocation request involve request for multiple number of blocks
+ * near to the goal(block) value specified.
+ *
+ * During initialization phase of the allocator we decide to use the group
+ * preallocation or inode preallocation depending on the size file. The
+ * size of the file could be the resulting file size we would have after
+ * allocation or the current file size which ever is larger. If the size is
+ * less that sbi->s_mb_stream_request we select the group
+ * preallocation. The default value of s_mb_stream_request is 16
+ * blocks. This can also be tuned via
+ * /proc/fs/ext4/<partition>/stream_req. The value is represented in terms
+ * of number of blocks.
+ *
+ * The main motivation for having small file use group preallocation is to
+ * ensure that we have small file closer in the disk.
+ *
+ * First stage the allocator looks at the inode prealloc list
+ * ext4_inode_info->i_prealloc_list contain list of prealloc spaces for
+ * this particular inode. The inode prealloc space is represented as:
+ *
+ * pa_lstart -> the logical start block for this prealloc space
+ * pa_pstart -> the physical start block for this prealloc space
+ * pa_len    -> lenght for this prealloc space
+ * pa_free   ->  free space available in this prealloc space
+ *
+ * The inode preallocation space is used looking at the _logical_ start
+ * block. If only the logical file block falls within the range of prealloc
+ * space we will consume the particular prealloc space. This make sure that
+ * that the we have contiguous physical blocks representing the file blocks
+ *
+ * The important thing to be noted in case of inode prealloc space is that
+ * we don't modify the values associated to inode prealloc space except
+ * pa_free.
+ *
+ * If we are not able to find blocks in the inode prealloc space and if we
+ * have the group allocation flag set then we look at the locality group
+ * prealloc space. These are per CPU prealloc list repreasented as
+ *
+ * ext4_sb_info.s_locality_groups[smp_processor_id()]
+ *
+ * The reason for having a per cpu locality group is to reduce the contention
+ * between CPUs. It is possible to get scheduled at this point.
+ *
+ * The locality group prealloc space is used looking at whether we have
+ * enough free space (pa_free) withing the prealloc space.
+ *
+ * If we can't allocate blocks via inode prealloc or/and locality group
+ * prealloc then we look at the buddy cache. The buddy cache is represented
+ * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets
+ * mapped to the buddy and bitmap information regarding different
+ * groups. The buddy information is attached to buddy cache inode so that
+ * we can access them through the page cache. The information regarding
+ * each group is loaded via ext4_mb_load_buddy.  The information involve
+ * block bitmap and buddy information. The information are stored in the
+ * inode as:
+ *
+ *  {                        page                        }
+ *  [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]...
+ *
+ *
+ * one block each for bitmap and buddy information.  So for each group we
+ * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE /
+ * blocksize) blocks.  So it can have information regarding groups_per_page
+ * which is blocks_per_page/2
+ *
+ * The buddy cache inode is not stored on disk. The inode is thrown
+ * away when the filesystem is unmounted.
+ *
+ * We look for count number of blocks in the buddy cache. If we were able
+ * to locate that many free blocks we return with additional information
+ * regarding rest of the contiguous physical block available
+ *
+ * Before allocating blocks via buddy cache we normalize the request
+ * blocks. This ensure we ask for more blocks that we needed. The extra
+ * blocks that we get after allocation is added to the respective prealloc
+ * list. In case of inode preallocation we follow a list of heuristics
+ * based on file size. This can be found in ext4_mb_normalize_request. If
+ * we are doing a group prealloc we try to normalize the request to
+ * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to
+ * 512 blocks. This can be tuned via
+ * /proc/fs/ext4/<partition/group_prealloc. The value is represented in
+ * terms of number of blocks. If we have mounted the file system with -O
+ * stripe=<value> option the group prealloc request is normalized to the
+ * stripe value (sbi->s_stripe)
+ *
+ * The regular allocator(using the buddy cache) support few tunables.
+ *
+ * /proc/fs/ext4/<partition>/min_to_scan
+ * /proc/fs/ext4/<partition>/max_to_scan
+ * /proc/fs/ext4/<partition>/order2_req
+ *
+ * The regular allocator use buddy scan only if the request len is power of
+ * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
+ * value of s_mb_order2_reqs can be tuned via
+ * /proc/fs/ext4/<partition>/order2_req.  If the request len is equal to
+ * stripe size (sbi->s_stripe), we try to search for contigous block in
+ * stripe size. This should result in better allocation on RAID setup. If
+ * not we search in the specific group using bitmap for best extents. The
+ * tunable min_to_scan and max_to_scan controll the behaviour here.
+ * min_to_scan indicate how long the mballoc __must__ look for a best
+ * extent and max_to_scanindicate how long the mballoc __can__ look for a
+ * best extent in the found extents. Searching for the blocks starts with
+ * the group specified as the goal value in allocation context via
+ * ac_g_ex. Each group is first checked based on the criteria whether it
+ * can used for allocation. ext4_mb_good_group explains how the groups are
+ * checked.
+ *
+ * Both the prealloc space are getting populated as above. So for the first
+ * request we will hit the buddy cache which will result in this prealloc
+ * space getting filled. The prealloc space is then later used for the
+ * subsequent request.
+ */
+
+/*
+ * mballoc operates on the following data:
+ *  - on-disk bitmap
+ *  - in-core buddy (actually includes buddy and bitmap)
+ *  - preallocation descriptors (PAs)
+ *
+ * there are two types of preallocations:
+ *  - inode
+ *    assiged to specific inode and can be used for this inode only.
+ *    it describes part of inode's space preallocated to specific
+ *    physical blocks. any block from that preallocated can be used
+ *    independent. the descriptor just tracks number of blocks left
+ *    unused. so, before taking some block from descriptor, one must
+ *    make sure corresponded logical block isn't allocated yet. this
+ *    also means that freeing any block within descriptor's range
+ *    must discard all preallocated blocks.
+ *  - locality group
+ *    assigned to specific locality group which does not translate to
+ *    permanent set of inodes: inode can join and leave group. space
+ *    from this type of preallocation can be used for any inode. thus
+ *    it's consumed from the beginning to the end.
+ *
+ * relation between them can be expressed as:
+ *    in-core buddy = on-disk bitmap + preallocation descriptors
+ *
+ * this mean blocks mballoc considers used are:
+ *  - allocated blocks (persistent)
+ *  - preallocated blocks (non-persistent)
+ *
+ * consistency in mballoc world means that at any time a block is either
+ * free or used in ALL structures. notice: "any time" should not be read
+ * literally -- time is discrete and delimited by locks.
+ *
+ *  to keep it simple, we don't use block numbers, instead we count number of
+ *  blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA.
+ *
+ * all operations can be expressed as:
+ *  - init buddy:                      buddy = on-disk + PAs
+ *  - new PA:                          buddy += N; PA = N
+ *  - use inode PA:                    on-disk += N; PA -= N
+ *  - discard inode PA                 buddy -= on-disk - PA; PA = 0
+ *  - use locality group PA            on-disk += N; PA -= N
+ *  - discard locality group PA                buddy -= PA; PA = 0
+ *  note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap
+ *        is used in real operation because we can't know actual used
+ *        bits from PA, only from on-disk bitmap
+ *
+ * if we follow this strict logic, then all operations above should be atomic.
+ * given some of them can block, we'd have to use something like semaphores
+ * killing performance on high-end SMP hardware. let's try to relax it using
+ * the following knowledge:
+ *  1) if buddy is referenced, it's already initialized
+ *  2) while block is used in buddy and the buddy is referenced,
+ *     nobody can re-allocate that block
+ *  3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has
+ *     bit set and PA claims same block, it's OK. IOW, one can set bit in
+ *     on-disk bitmap if buddy has same bit set or/and PA covers corresponded
+ *     block
+ *
+ * so, now we're building a concurrency table:
+ *  - init buddy vs.
+ *    - new PA
+ *      blocks for PA are allocated in the buddy, buddy must be referenced
+ *      until PA is linked to allocation group to avoid concurrent buddy init
+ *    - use inode PA
+ *      we need to make sure that either on-disk bitmap or PA has uptodate data
+ *      given (3) we care that PA-=N operation doesn't interfere with init
+ *    - discard inode PA
+ *      the simplest way would be to have buddy initialized by the discard
+ *    - use locality group PA
+ *      again PA-=N must be serialized with init
+ *    - discard locality group PA
+ *      the simplest way would be to have buddy initialized by the discard
+ *  - new PA vs.
+ *    - use inode PA
+ *      i_data_sem serializes them
+ *    - discard inode PA
+ *      discard process must wait until PA isn't used by another process
+ *    - use locality group PA
+ *      some mutex should serialize them
+ *    - discard locality group PA
+ *      discard process must wait until PA isn't used by another process
+ *  - use inode PA
+ *    - use inode PA
+ *      i_data_sem or another mutex should serializes them
+ *    - discard inode PA
+ *      discard process must wait until PA isn't used by another process
+ *    - use locality group PA
+ *      nothing wrong here -- they're different PAs covering different blocks
+ *    - discard locality group PA
+ *      discard process must wait until PA isn't used by another process
+ *
+ * now we're ready to make few consequences:
+ *  - PA is referenced and while it is no discard is possible
+ *  - PA is referenced until block isn't marked in on-disk bitmap
+ *  - PA changes only after on-disk bitmap
+ *  - discard must not compete with init. either init is done before
+ *    any discard or they're serialized somehow
+ *  - buddy init as sum of on-disk bitmap and PAs is done atomically
+ *
+ * a special case when we've used PA to emptiness. no need to modify buddy
+ * in this case, but we should care about concurrent init
+ *
+ */
+
+ /*
+ * Logic in few words:
+ *
+ *  - allocation:
+ *    load group
+ *    find blocks
+ *    mark bits in on-disk bitmap
+ *    release group
+ *
+ *  - use preallocation:
+ *    find proper PA (per-inode or group)
+ *    load group
+ *    mark bits in on-disk bitmap
+ *    release group
+ *    release PA
+ *
+ *  - free:
+ *    load group
+ *    mark bits in on-disk bitmap
+ *    release group
+ *
+ *  - discard preallocations in group:
+ *    mark PAs deleted
+ *    move them onto local list
+ *    load on-disk bitmap
+ *    load group
+ *    remove PA from object (inode or locality group)
+ *    mark free blocks in-core
+ *
+ *  - discard inode's preallocations:
+ */
+
+/*
+ * Locking rules
+ *
+ * Locks:
+ *  - bitlock on a group       (group)
+ *  - object (inode/locality)  (object)
+ *  - per-pa lock              (pa)
+ *
+ * Paths:
+ *  - new pa
+ *    object
+ *    group
+ *
+ *  - find and use pa:
+ *    pa
+ *
+ *  - release consumed pa:
+ *    pa
+ *    group
+ *    object
+ *
+ *  - generate in-core bitmap:
+ *    group
+ *        pa
+ *
+ *  - discard all for given object (inode, locality group):
+ *    object
+ *        pa
+ *    group
+ *
+ *  - discard all for given group:
+ *    group
+ *        pa
+ *    group
+ *        object
+ *
+ */
+
+/*
+ * with AGGRESSIVE_CHECK allocator runs consistency checks over
+ * structures. these checks slow things down a lot
+ */
+#define AGGRESSIVE_CHECK__
+
+/*
+ * with DOUBLE_CHECK defined mballoc creates persistent in-core
+ * bitmaps, maintains and uses them to check for double allocations
+ */
+#define DOUBLE_CHECK__
+
+/*
+ */
+#define MB_DEBUG__
+#ifdef MB_DEBUG
+#define mb_debug(fmt, a...)    printk(fmt, ##a)
+#else
+#define mb_debug(fmt, a...)
+#endif
+
+/*
+ * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
+ * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
+ */
+#define EXT4_MB_HISTORY
+#define EXT4_MB_HISTORY_ALLOC          1       /* allocation */
+#define EXT4_MB_HISTORY_PREALLOC       2       /* preallocated blocks used */
+#define EXT4_MB_HISTORY_DISCARD                4       /* preallocation discarded */
+#define EXT4_MB_HISTORY_FREE           8       /* free */
+
+#define EXT4_MB_HISTORY_DEFAULT                (EXT4_MB_HISTORY_ALLOC | \
+                                        EXT4_MB_HISTORY_PREALLOC)
+
+/*
+ * How long mballoc can look for a best extent (in found extents)
+ */
+#define MB_DEFAULT_MAX_TO_SCAN         200
+
+/*
+ * How long mballoc must look for a best extent
+ */
+#define MB_DEFAULT_MIN_TO_SCAN         10
+
+/*
+ * How many groups mballoc will scan looking for the best chunk
+ */
+#define MB_DEFAULT_MAX_GROUPS_TO_SCAN  5
+
+/*
+ * with 'ext4_mb_stats' allocator will collect stats that will be
+ * shown at umount. The collecting costs though!
+ */
+#define MB_DEFAULT_STATS               1
+
+/*
+ * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
+ * by the stream allocator, which purpose is to pack requests
+ * as close each to other as possible to produce smooth I/O traffic
+ * We use locality group prealloc space for stream request.
+ * We can tune the same via /proc/fs/ext4/<parition>/stream_req
+ */
+#define MB_DEFAULT_STREAM_THRESHOLD    16      /* 64K */
+
+/*
+ * for which requests use 2^N search using buddies
+ */
+#define MB_DEFAULT_ORDER2_REQS         2
+
+/*
+ * default group prealloc size 512 blocks
+ */
+#define MB_DEFAULT_GROUP_PREALLOC      512
+
+static struct kmem_cache *ext4_pspace_cachep;
+
+#ifdef EXT4_BB_MAX_BLOCKS
+#undef EXT4_BB_MAX_BLOCKS
+#endif
+#define EXT4_BB_MAX_BLOCKS     30
+
+struct ext4_free_metadata {
+       ext4_group_t group;
+       unsigned short num;
+       ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
+       struct list_head list;
+};
+
+struct ext4_group_info {
+       unsigned long   bb_state;
+       unsigned long   bb_tid;
+       struct ext4_free_metadata *bb_md_cur;
+       unsigned short  bb_first_free;
+       unsigned short  bb_free;
+       unsigned short  bb_fragments;
+       struct          list_head bb_prealloc_list;
+#ifdef DOUBLE_CHECK
+       void            *bb_bitmap;
+#endif
+       unsigned short  bb_counters[];
+};
+
+#define EXT4_GROUP_INFO_NEED_INIT_BIT  0
+#define EXT4_GROUP_INFO_LOCKED_BIT     1
+
+#define EXT4_MB_GRP_NEED_INIT(grp)     \
+       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+
+
+struct ext4_prealloc_space {
+       struct list_head        pa_inode_list;
+       struct list_head        pa_group_list;
+       union {
+               struct list_head pa_tmp_list;
+               struct rcu_head pa_rcu;
+       } u;
+       spinlock_t              pa_lock;
+       atomic_t                pa_count;
+       unsigned                pa_deleted;
+       ext4_fsblk_t            pa_pstart;      /* phys. block */
+       ext4_lblk_t             pa_lstart;      /* log. block */
+       unsigned short          pa_len;         /* len of preallocated chunk */
+       unsigned short          pa_free;        /* how many blocks are free */
+       unsigned short          pa_linear;      /* consumed in one direction
+                                                * strictly, for grp prealloc */
+       spinlock_t              *pa_obj_lock;
+       struct inode            *pa_inode;      /* hack, for history only */
+};
+
+
+struct ext4_free_extent {
+       ext4_lblk_t fe_logical;
+       ext4_grpblk_t fe_start;
+       ext4_group_t fe_group;
+       int fe_len;
+};
+
+/*
+ * Locality group:
+ *   we try to group all related changes together
+ *   so that writeback can flush/allocate them together as well
+ */
+struct ext4_locality_group {
+       /* for allocator */
+       struct mutex            lg_mutex;       /* to serialize allocates */
+       struct list_head        lg_prealloc_list;/* list of preallocations */
+       spinlock_t              lg_prealloc_lock;
+};
+
+struct ext4_allocation_context {
+       struct inode *ac_inode;
+       struct super_block *ac_sb;
+
+       /* original request */
+       struct ext4_free_extent ac_o_ex;
+
+       /* goal request (after normalization) */
+       struct ext4_free_extent ac_g_ex;
+
+       /* the best found extent */
+       struct ext4_free_extent ac_b_ex;
+
+       /* copy of the bext found extent taken before preallocation efforts */
+       struct ext4_free_extent ac_f_ex;
+
+       /* number of iterations done. we have to track to limit searching */
+       unsigned long ac_ex_scanned;
+       __u16 ac_groups_scanned;
+       __u16 ac_found;
+       __u16 ac_tail;
+       __u16 ac_buddy;
+       __u16 ac_flags;         /* allocation hints */
+       __u8 ac_status;
+       __u8 ac_criteria;
+       __u8 ac_repeats;
+       __u8 ac_2order;         /* if request is to allocate 2^N blocks and
+                                * N > 0, the field stores N, otherwise 0 */
+       __u8 ac_op;             /* operation, for history only */
+       struct page *ac_bitmap_page;
+       struct page *ac_buddy_page;
+       struct ext4_prealloc_space *ac_pa;
+       struct ext4_locality_group *ac_lg;
+};
+
+#define AC_STATUS_CONTINUE     1
+#define AC_STATUS_FOUND                2
+#define AC_STATUS_BREAK                3
+
+struct ext4_mb_history {
+       struct ext4_free_extent orig;   /* orig allocation */
+       struct ext4_free_extent goal;   /* goal allocation */
+       struct ext4_free_extent result; /* result allocation */
+       unsigned pid;
+       unsigned ino;
+       __u16 found;    /* how many extents have been found */
+       __u16 groups;   /* how many groups have been scanned */
+       __u16 tail;     /* what tail broke some buddy */
+       __u16 buddy;    /* buddy the tail ^^^ broke */
+       __u16 flags;
+       __u8 cr:3;      /* which phase the result extent was found at */
+       __u8 op:4;
+       __u8 merged:1;
+};
+
+struct ext4_buddy {
+       struct page *bd_buddy_page;
+       void *bd_buddy;
+       struct page *bd_bitmap_page;
+       void *bd_bitmap;
+       struct ext4_group_info *bd_info;
+       struct super_block *bd_sb;
+       __u16 bd_blkbits;
+       ext4_group_t bd_group;
+};
+#define EXT4_MB_BITMAP(e4b)    ((e4b)->bd_bitmap)
+#define EXT4_MB_BUDDY(e4b)     ((e4b)->bd_buddy)
+
+#ifndef EXT4_MB_HISTORY
+static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
+{
+       return;
+}
+#else
+static void ext4_mb_store_history(struct ext4_allocation_context *ac);
+#endif
+
+#define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
+
+static struct proc_dir_entry *proc_root_ext4;
+struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
+ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
+                       ext4_fsblk_t goal, unsigned long *count, int *errp);
+
+static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                       ext4_group_t group);
+static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
+static void ext4_mb_free_committed_blocks(struct super_block *);
+static void ext4_mb_return_to_preallocation(struct inode *inode,
+                                       struct ext4_buddy *e4b, sector_t block,
+                                       int count);
+static void ext4_mb_put_pa(struct ext4_allocation_context *,
+                       struct super_block *, struct ext4_prealloc_space *pa);
+static int ext4_mb_init_per_dev_proc(struct super_block *sb);
+static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
+
+
+static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+{
+       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+
+       bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+
+static inline void ext4_unlock_group(struct super_block *sb,
+                                       ext4_group_t group)
+{
+       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+
+       bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+
+static inline int ext4_is_group_locked(struct super_block *sb,
+                                       ext4_group_t group)
+{
+       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+
+       return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
+                                               &(grinfo->bb_state));
+}
+
+static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
+                                       struct ext4_free_extent *fex)
+{
+       ext4_fsblk_t block;
+
+       block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
+                       + fex->fe_start
+                       + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+       return block;
+}
+
+#if BITS_PER_LONG == 64
+#define mb_correct_addr_and_bit(bit, addr)             \
+{                                                      \
+       bit += ((unsigned long) addr & 7UL) << 3;       \
+       addr = (void *) ((unsigned long) addr & ~7UL);  \
+}
+#elif BITS_PER_LONG == 32
+#define mb_correct_addr_and_bit(bit, addr)             \
+{                                                      \
+       bit += ((unsigned long) addr & 3UL) << 3;       \
+       addr = (void *) ((unsigned long) addr & ~3UL);  \
+}
+#else
+#error "how many bits you are?!"
+#endif
+
+static inline int mb_test_bit(int bit, void *addr)
+{
+       /*
+        * ext4_test_bit on architecture like powerpc
+        * needs unsigned long aligned address
+        */
+       mb_correct_addr_and_bit(bit, addr);
+       return ext4_test_bit(bit, addr);
+}
+
+static inline void mb_set_bit(int bit, void *addr)
+{
+       mb_correct_addr_and_bit(bit, addr);
+       ext4_set_bit(bit, addr);
+}
+
+static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
+{
+       mb_correct_addr_and_bit(bit, addr);
+       ext4_set_bit_atomic(lock, bit, addr);
+}
+
+static inline void mb_clear_bit(int bit, void *addr)
+{
+       mb_correct_addr_and_bit(bit, addr);
+       ext4_clear_bit(bit, addr);
+}
+
+static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
+{
+       mb_correct_addr_and_bit(bit, addr);
+       ext4_clear_bit_atomic(lock, bit, addr);
+}
+
+static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
+{
+       char *bb;
+
+       /* FIXME!! is this needed */
+       BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
+       BUG_ON(max == NULL);
+
+       if (order > e4b->bd_blkbits + 1) {
+               *max = 0;
+               return NULL;
+       }
+
+       /* at order 0 we see each particular block */
+       *max = 1 << (e4b->bd_blkbits + 3);
+       if (order == 0)
+               return EXT4_MB_BITMAP(e4b);
+
+       bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
+       *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
+
+       return bb;
+}
+
+#ifdef DOUBLE_CHECK
+static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
+                          int first, int count)
+{
+       int i;
+       struct super_block *sb = e4b->bd_sb;
+
+       if (unlikely(e4b->bd_info->bb_bitmap == NULL))
+               return;
+       BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+       for (i = 0; i < count; i++) {
+               if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
+                       ext4_fsblk_t blocknr;
+                       blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
+                       blocknr += first + i;
+                       blocknr +=
+                           le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+
+                       ext4_error(sb, __FUNCTION__, "double-free of inode"
+                                  " %lu's block %llu(bit %u in group %lu)\n",
+                                  inode ? inode->i_ino : 0, blocknr,
+                                  first + i, e4b->bd_group);
+               }
+               mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
+       }
+}
+
+static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
+{
+       int i;
+
+       if (unlikely(e4b->bd_info->bb_bitmap == NULL))
+               return;
+       BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+       for (i = 0; i < count; i++) {
+               BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
+               mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
+       }
+}
+
+static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
+{
+       if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
+               unsigned char *b1, *b2;
+               int i;
+               b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
+               b2 = (unsigned char *) bitmap;
+               for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
+                       if (b1[i] != b2[i]) {
+                               printk("corruption in group %lu at byte %u(%u):"
+                                      " %x in copy != %x on disk/prealloc\n",
+                                       e4b->bd_group, i, i * 8, b1[i], b2[i]);
+                               BUG();
+                       }
+               }
+       }
+}
+
+#else
+static inline void mb_free_blocks_double(struct inode *inode,
+                               struct ext4_buddy *e4b, int first, int count)
+{
+       return;
+}
+static inline void mb_mark_used_double(struct ext4_buddy *e4b,
+                                               int first, int count)
+{
+       return;
+}
+static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
+{
+       return;
+}
+#endif
+
+#ifdef AGGRESSIVE_CHECK
+
+#define MB_CHECK_ASSERT(assert)                                                \
+do {                                                                   \
+       if (!(assert)) {                                                \
+               printk(KERN_EMERG                                       \
+                       "Assertion failure in %s() at %s:%d: \"%s\"\n", \
+                       function, file, line, # assert);                \
+               BUG();                                                  \
+       }                                                               \
+} while (0)
+
+static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
+                               const char *function, int line)
+{
+       struct super_block *sb = e4b->bd_sb;
+       int order = e4b->bd_blkbits + 1;
+       int max;
+       int max2;
+       int i;
+       int j;
+       int k;
+       int count;
+       struct ext4_group_info *grp;
+       int fragments = 0;
+       int fstart;
+       struct list_head *cur;
+       void *buddy;
+       void *buddy2;
+
+       if (!test_opt(sb, MBALLOC))
+               return 0;
+
+       {
+               static int mb_check_counter;
+               if (mb_check_counter++ % 100 != 0)
+                       return 0;
+       }
+
+       while (order > 1) {
+               buddy = mb_find_buddy(e4b, order, &max);
+               MB_CHECK_ASSERT(buddy);
+               buddy2 = mb_find_buddy(e4b, order - 1, &max2);
+               MB_CHECK_ASSERT(buddy2);
+               MB_CHECK_ASSERT(buddy != buddy2);
+               MB_CHECK_ASSERT(max * 2 == max2);
+
+               count = 0;
+               for (i = 0; i < max; i++) {
+
+                       if (mb_test_bit(i, buddy)) {
+                               /* only single bit in buddy2 may be 1 */
+                               if (!mb_test_bit(i << 1, buddy2)) {
+                                       MB_CHECK_ASSERT(
+                                               mb_test_bit((i<<1)+1, buddy2));
+                               } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
+                                       MB_CHECK_ASSERT(
+                                               mb_test_bit(i << 1, buddy2));
+                               }
+                               continue;
+                       }
+
+                       /* both bits in buddy2 must be 0 */
+                       MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
+                       MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
+
+                       for (j = 0; j < (1 << order); j++) {
+                               k = (i * (1 << order)) + j;
+                               MB_CHECK_ASSERT(
+                                       !mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
+                       }
+                       count++;
+               }
+               MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
+               order--;
+       }
+
+       fstart = -1;
+       buddy = mb_find_buddy(e4b, 0, &max);
+       for (i = 0; i < max; i++) {
+               if (!mb_test_bit(i, buddy)) {
+                       MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
+                       if (fstart == -1) {
+                               fragments++;
+                               fstart = i;
+                       }
+                       continue;
+               }
+               fstart = -1;
+               /* check used bits only */
+               for (j = 0; j < e4b->bd_blkbits + 1; j++) {
+                       buddy2 = mb_find_buddy(e4b, j, &max2);
+                       k = i >> j;
+                       MB_CHECK_ASSERT(k < max2);
+                       MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
+               }
+       }
+       MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
+       MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
+
+       grp = ext4_get_group_info(sb, e4b->bd_group);
+       buddy = mb_find_buddy(e4b, 0, &max);
+       list_for_each(cur, &grp->bb_prealloc_list) {
+               ext4_group_t groupnr;
+               struct ext4_prealloc_space *pa;
+               pa = list_entry(cur, struct ext4_prealloc_space, group_list);
+               ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k);
+               MB_CHECK_ASSERT(groupnr == e4b->bd_group);
+               for (i = 0; i < pa->len; i++)
+                       MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
+       }
+       return 0;
+}
+#undef MB_CHECK_ASSERT
+#define mb_check_buddy(e4b) __mb_check_buddy(e4b,      \
+                                       __FILE__, __FUNCTION__, __LINE__)
+#else
+#define mb_check_buddy(e4b)
+#endif
+
+/* FIXME!! need more doc */
+static void ext4_mb_mark_free_simple(struct super_block *sb,
+                               void *buddy, unsigned first, int len,
+                                       struct ext4_group_info *grp)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       unsigned short min;
+       unsigned short max;
+       unsigned short chunk;
+       unsigned short border;
+
+       BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb));
+
+       border = 2 << sb->s_blocksize_bits;
+
+       while (len > 0) {
+               /* find how many blocks can be covered since this position */
+               max = ffs(first | border) - 1;
+
+               /* find how many blocks of power 2 we need to mark */
+               min = fls(len) - 1;
+
+               if (max < min)
+                       min = max;
+               chunk = 1 << min;
+
+               /* mark multiblock chunks only */
+               grp->bb_counters[min]++;
+               if (min > 0)
+                       mb_clear_bit(first >> min,
+                                    buddy + sbi->s_mb_offsets[min]);
+
+               len -= chunk;
+               first += chunk;
+       }
+}
+
+static void ext4_mb_generate_buddy(struct super_block *sb,
+                               void *buddy, void *bitmap, ext4_group_t group)
+{
+       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+       unsigned short max = EXT4_BLOCKS_PER_GROUP(sb);
+       unsigned short i = 0;
+       unsigned short first;
+       unsigned short len;
+       unsigned free = 0;
+       unsigned fragments = 0;
+       unsigned long long period = get_cycles();
+
+       /* initialize buddy from bitmap which is aggregation
+        * of on-disk bitmap and preallocations */
+       i = ext4_find_next_zero_bit(bitmap, max, 0);
+       grp->bb_first_free = i;
+       while (i < max) {
+               fragments++;
+               first = i;
+               i = ext4_find_next_bit(bitmap, max, i);
+               len = i - first;
+               free += len;
+               if (len > 1)
+                       ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
+               else
+                       grp->bb_counters[0]++;
+               if (i < max)
+                       i = ext4_find_next_zero_bit(bitmap, max, i);
+       }
+       grp->bb_fragments = fragments;
+
+       if (free != grp->bb_free) {
+               printk(KERN_DEBUG
+                       "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
+                       group, free, grp->bb_free);
+               grp->bb_free = free;
+       }
+
+       clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
+
+       period = get_cycles() - period;
+       spin_lock(&EXT4_SB(sb)->s_bal_lock);
+       EXT4_SB(sb)->s_mb_buddies_generated++;
+       EXT4_SB(sb)->s_mb_generation_time += period;
+       spin_unlock(&EXT4_SB(sb)->s_bal_lock);
+}
+
+/* The buddy information is attached the buddy cache inode
+ * for convenience. The information regarding each group
+ * is loaded via ext4_mb_load_buddy. The information involve
+ * block bitmap and buddy information. The information are
+ * stored in the inode as
+ *
+ * {                        page                        }
+ * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]...
+ *
+ *
+ * one block each for bitmap and buddy information.
+ * So for each group we take up 2 blocks. A page can
+ * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
+ * So it can have information regarding groups_per_page which
+ * is blocks_per_page/2
+ */
+
+static int ext4_mb_init_cache(struct page *page, char *incore)
+{
+       int blocksize;
+       int blocks_per_page;
+       int groups_per_page;
+       int err = 0;
+       int i;
+       ext4_group_t first_group;
+       int first_block;
+       struct super_block *sb;
+       struct buffer_head *bhs;
+       struct buffer_head **bh;
+       struct inode *inode;
+       char *data;
+       char *bitmap;
+
+       mb_debug("init page %lu\n", page->index);
+
+       inode = page->mapping->host;
+       sb = inode->i_sb;
+       blocksize = 1 << inode->i_blkbits;
+       blocks_per_page = PAGE_CACHE_SIZE / blocksize;
+
+       groups_per_page = blocks_per_page >> 1;
+       if (groups_per_page == 0)
+               groups_per_page = 1;
+
+       /* allocate buffer_heads to read bitmaps */
+       if (groups_per_page > 1) {
+               err = -ENOMEM;
+               i = sizeof(struct buffer_head *) * groups_per_page;
+               bh = kzalloc(i, GFP_NOFS);
+               if (bh == NULL)
+                       goto out;
+       } else
+               bh = &bhs;
+
+       first_group = page->index * blocks_per_page / 2;
+
+       /* read all groups the page covers into the cache */
+       for (i = 0; i < groups_per_page; i++) {
+               struct ext4_group_desc *desc;
+
+               if (first_group + i >= EXT4_SB(sb)->s_groups_count)
+                       break;
+
+               err = -EIO;
+               desc = ext4_get_group_desc(sb, first_group + i, NULL);
+               if (desc == NULL)
+                       goto out;
+
+               err = -ENOMEM;
+               bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
+               if (bh[i] == NULL)
+                       goto out;
+
+               if (bh_uptodate_or_lock(bh[i]))
+                       continue;
+
+               if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+                       ext4_init_block_bitmap(sb, bh[i],
+                                               first_group + i, desc);
+                       set_buffer_uptodate(bh[i]);
+                       unlock_buffer(bh[i]);
+                       continue;
+               }
+               get_bh(bh[i]);
+               bh[i]->b_end_io = end_buffer_read_sync;
+               submit_bh(READ, bh[i]);
+               mb_debug("read bitmap for group %lu\n", first_group + i);
+       }
+
+       /* wait for I/O completion */
+       for (i = 0; i < groups_per_page && bh[i]; i++)
+               wait_on_buffer(bh[i]);
+
+       err = -EIO;
+       for (i = 0; i < groups_per_page && bh[i]; i++)
+               if (!buffer_uptodate(bh[i]))
+                       goto out;
+
+       first_block = page->index * blocks_per_page;
+       for (i = 0; i < blocks_per_page; i++) {
+               int group;
+               struct ext4_group_info *grinfo;
+
+               group = (first_block + i) >> 1;
+               if (group >= EXT4_SB(sb)->s_groups_count)
+                       break;
+
+               /*
+                * data carry information regarding this
+                * particular group in the format specified
+                * above
+                *
+                */
+               data = page_address(page) + (i * blocksize);
+               bitmap = bh[group - first_group]->b_data;
+
+               /*
+                * We place the buddy block and bitmap block
+                * close together
+                */
+               if ((first_block + i) & 1) {
+                       /* this is block of buddy */
+                       BUG_ON(incore == NULL);
+                       mb_debug("put buddy for group %u in page %lu/%x\n",
+                               group, page->index, i * blocksize);
+                       memset(data, 0xff, blocksize);
+                       grinfo = ext4_get_group_info(sb, group);
+                       grinfo->bb_fragments = 0;
+                       memset(grinfo->bb_counters, 0,
+                              sizeof(unsigned short)*(sb->s_blocksize_bits+2));
+                       /*
+                        * incore got set to the group block bitmap below
+                        */
+                       ext4_mb_generate_buddy(sb, data, incore, group);
+                       incore = NULL;
+               } else {
+                       /* this is block of bitmap */
+                       BUG_ON(incore != NULL);
+                       mb_debug("put bitmap for group %u in page %lu/%x\n",
+                               group, page->index, i * blocksize);
+
+                       /* see comments in ext4_mb_put_pa() */
+                       ext4_lock_group(sb, group);
+                       memcpy(data, bitmap, blocksize);
+
+                       /* mark all preallocated blks used in in-core bitmap */
+                       ext4_mb_generate_from_pa(sb, data, group);
+                       ext4_unlock_group(sb, group);
+
+                       /* set incore so that the buddy information can be
+                        * generated using this
+                        */
+                       incore = data;
+               }
+       }
+       SetPageUptodate(page);
+
+out:
+       if (bh) {
+               for (i = 0; i < groups_per_page && bh[i]; i++)
+                       brelse(bh[i]);
+               if (bh != &bhs)
+                       kfree(bh);
+       }
+       return err;
+}
+
+static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+               struct ext4_buddy *e4b)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct inode *inode = sbi->s_buddy_cache;
+       int blocks_per_page;
+       int block;
+       int pnum;
+       int poff;
+       struct page *page;
+
+       mb_debug("load group %lu\n", group);
+
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+
+       e4b->bd_blkbits = sb->s_blocksize_bits;
+       e4b->bd_info = ext4_get_group_info(sb, group);
+       e4b->bd_sb = sb;
+       e4b->bd_group = group;
+       e4b->bd_buddy_page = NULL;
+       e4b->bd_bitmap_page = NULL;
+
+       /*
+        * the buddy cache inode stores the block bitmap
+        * and buddy information in consecutive blocks.
+        * So for each group we need two blocks.
+        */
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       poff = block % blocks_per_page;
+
+       /* we could use find_or_create_page(), but it locks page
+        * what we'd like to avoid in fast path ... */
+       page = find_get_page(inode->i_mapping, pnum);
+       if (page == NULL || !PageUptodate(page)) {
+               if (page)
+                       page_cache_release(page);
+               page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+               if (page) {
+                       BUG_ON(page->mapping != inode->i_mapping);
+                       if (!PageUptodate(page)) {
+                               ext4_mb_init_cache(page, NULL);
+                               mb_cmp_bitmaps(e4b, page_address(page) +
+                                              (poff * sb->s_blocksize));
+                       }
+                       unlock_page(page);
+               }
+       }
+       if (page == NULL || !PageUptodate(page))
+               goto err;
+       e4b->bd_bitmap_page = page;
+       e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
+       mark_page_accessed(page);
+
+       block++;
+       pnum = block / blocks_per_page;
+       poff = block % blocks_per_page;
+
+       page = find_get_page(inode->i_mapping, pnum);
+       if (page == NULL || !PageUptodate(page)) {
+               if (page)
+                       page_cache_release(page);
+               page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+               if (page) {
+                       BUG_ON(page->mapping != inode->i_mapping);
+                       if (!PageUptodate(page))
+                               ext4_mb_init_cache(page, e4b->bd_bitmap);
+
+                       unlock_page(page);
+               }
+       }
+       if (page == NULL || !PageUptodate(page))
+               goto err;
+       e4b->bd_buddy_page = page;
+       e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
+       mark_page_accessed(page);
+
+       BUG_ON(e4b->bd_bitmap_page == NULL);
+       BUG_ON(e4b->bd_buddy_page == NULL);
+
+       return 0;
+
+err:
+       if (e4b->bd_bitmap_page)
+               page_cache_release(e4b->bd_bitmap_page);
+       if (e4b->bd_buddy_page)
+               page_cache_release(e4b->bd_buddy_page);
+       e4b->bd_buddy = NULL;
+       e4b->bd_bitmap = NULL;
+       return -EIO;
+}
+
+static void ext4_mb_release_desc(struct ext4_buddy *e4b)
+{
+       if (e4b->bd_bitmap_page)
+               page_cache_release(e4b->bd_bitmap_page);
+       if (e4b->bd_buddy_page)
+               page_cache_release(e4b->bd_buddy_page);
+}
+
+
+static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
+{
+       int order = 1;
+       void *bb;
+
+       BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
+       BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
+
+       bb = EXT4_MB_BUDDY(e4b);
+       while (order <= e4b->bd_blkbits + 1) {
+               block = block >> 1;
+               if (!mb_test_bit(block, bb)) {
+                       /* this block is part of buddy of order 'order' */
+                       return order;
+               }
+               bb += 1 << (e4b->bd_blkbits - order);
+               order++;
+       }
+       return 0;
+}
+
+static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
+{
+       __u32 *addr;
+
+       len = cur + len;
+       while (cur < len) {
+               if ((cur & 31) == 0 && (len - cur) >= 32) {
+                       /* fast path: clear whole word at once */
+                       addr = bm + (cur >> 3);
+                       *addr = 0;
+                       cur += 32;
+                       continue;
+               }
+               mb_clear_bit_atomic(lock, cur, bm);
+               cur++;
+       }
+}
+
+static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
+{
+       __u32 *addr;
+
+       len = cur + len;
+       while (cur < len) {
+               if ((cur & 31) == 0 && (len - cur) >= 32) {
+                       /* fast path: set whole word at once */
+                       addr = bm + (cur >> 3);
+                       *addr = 0xffffffff;
+                       cur += 32;
+                       continue;
+               }
+               mb_set_bit_atomic(lock, cur, bm);
+               cur++;
+       }
+}
+
+static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
+                         int first, int count)
+{
+       int block = 0;
+       int max = 0;
+       int order;
+       void *buddy;
+       void *buddy2;
+       struct super_block *sb = e4b->bd_sb;
+
+       BUG_ON(first + count > (sb->s_blocksize << 3));
+       BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+       mb_check_buddy(e4b);
+       mb_free_blocks_double(inode, e4b, first, count);
+
+       e4b->bd_info->bb_free += count;
+       if (first < e4b->bd_info->bb_first_free)
+               e4b->bd_info->bb_first_free = first;
+
+       /* let's maintain fragments counter */
+       if (first != 0)
+               block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
+       if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
+               max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
+       if (block && max)
+               e4b->bd_info->bb_fragments--;
+       else if (!block && !max)
+               e4b->bd_info->bb_fragments++;
+
+       /* let's maintain buddy itself */
+       while (count-- > 0) {
+               block = first++;
+               order = 0;
+
+               if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
+                       ext4_fsblk_t blocknr;
+                       blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
+                       blocknr += block;
+                       blocknr +=
+                           le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+
+                       ext4_error(sb, __FUNCTION__, "double-free of inode"
+                                  " %lu's block %llu(bit %u in group %lu)\n",
+                                  inode ? inode->i_ino : 0, blocknr, block,
+                                  e4b->bd_group);
+               }
+               mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
+               e4b->bd_info->bb_counters[order]++;
+
+               /* start of the buddy */
+               buddy = mb_find_buddy(e4b, order, &max);
+
+               do {
+                       block &= ~1UL;
+                       if (mb_test_bit(block, buddy) ||
+                                       mb_test_bit(block + 1, buddy))
+                               break;
+
+                       /* both the buddies are free, try to coalesce them */
+                       buddy2 = mb_find_buddy(e4b, order + 1, &max);
+
+                       if (!buddy2)
+                               break;
+
+                       if (order > 0) {
+                               /* for special purposes, we don't set
+                                * free bits in bitmap */
+                               mb_set_bit(block, buddy);
+                               mb_set_bit(block + 1, buddy);
+                       }
+                       e4b->bd_info->bb_counters[order]--;
+                       e4b->bd_info->bb_counters[order]--;
+
+                       block = block >> 1;
+                       order++;
+                       e4b->bd_info->bb_counters[order]++;
+
+                       mb_clear_bit(block, buddy2);
+                       buddy = buddy2;
+               } while (1);
+       }
+       mb_check_buddy(e4b);
+
+       return 0;
+}
+
+static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
+                               int needed, struct ext4_free_extent *ex)
+{
+       int next = block;
+       int max;
+       int ord;
+       void *buddy;
+
+       BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+       BUG_ON(ex == NULL);
+
+       buddy = mb_find_buddy(e4b, order, &max);
+       BUG_ON(buddy == NULL);
+       BUG_ON(block >= max);
+       if (mb_test_bit(block, buddy)) {
+               ex->fe_len = 0;
+               ex->fe_start = 0;
+               ex->fe_group = 0;
+               return 0;
+       }
+
+       /* FIXME dorp order completely ? */
+       if (likely(order == 0)) {
+               /* find actual order */
+               order = mb_find_order_for_block(e4b, block);
+               block = block >> order;
+       }
+
+       ex->fe_len = 1 << order;
+       ex->fe_start = block << order;
+       ex->fe_group = e4b->bd_group;
+
+       /* calc difference from given start */
+       next = next - ex->fe_start;
+       ex->fe_len -= next;
+       ex->fe_start += next;
+
+       while (needed > ex->fe_len &&
+              (buddy = mb_find_buddy(e4b, order, &max))) {
+
+               if (block + 1 >= max)
+                       break;
+
+               next = (block + 1) * (1 << order);
+               if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
+                       break;
+
+               ord = mb_find_order_for_block(e4b, next);
+
+               order = ord;
+               block = next >> order;
+               ex->fe_len += 1 << orde