]> nv-tegra.nvidia Code Review - linux-2.6.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Oct 2008 16:13:34 +0000 (09:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Oct 2008 16:13:34 +0000 (09:13 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6: (112 commits)
  sh: Move SH-4 CPU headers down one more level.
  sh: Only build in gpio.o when CONFIG_GENERIC_GPIO is selected.
  sh: Migrate common board headers to mach-common/.
  sh: Move the CPU definition headers from asm/ to cpu/.
  serial: sh-sci: Add support SCIF of SH7723
  video: add sh_mobile_lcdc platform flags
  video: remove unused sh_mobile_lcdc platform data
  sh: remove consistent alloc cruft
  sh: add dynamic crash base address support
  sh: reduce Migo-R smc91x overruns
  sh: Fix up some merge damage.
  Fix debugfs_create_file's error checking method for arch/sh/mm/
  Fix debugfs_create_dir's error checking method for arch/sh/kernel/
  sh: ap325rxa: Add support RTC RX-8564LC in AP325RXA board
  sh: Use sh7720 GPIO on magicpanelr2 board
  sh: Add sh7720 pinmux code
  sh: Use sh7203 GPIO on rsk7203 board
  sh: Add sh7203 pinmux code
  sh: Use sh7723 GPIO on AP325RXA board
  sh: Add sh7723 pinmux code
  ...

352 files changed:
.mailmap
Documentation/cgroups/cgroups.txt [moved from Documentation/cgroups.txt with 100% similarity]
Documentation/cgroups/freezer-subsystem.txt [new file with mode: 0644]
Documentation/controllers/memory.txt
Documentation/cpusets.txt
Documentation/filesystems/ext3.txt
Documentation/filesystems/proc.txt
Documentation/kernel-parameters.txt
Documentation/mtd/nand_ecc.txt [new file with mode: 0644]
Documentation/sysrq.txt
Documentation/vm/unevictable-lru.txt [new file with mode: 0644]
arch/alpha/Kconfig
arch/alpha/include/asm/thread_info.h
arch/alpha/kernel/core_marvel.c
arch/alpha/kernel/time.c
arch/arm/Kconfig
arch/arm/mach-pxa/include/mach/pxa3xx_nand.h
arch/arm/plat-mxc/include/mach/mxc_nand.h [new file with mode: 0644]
arch/arm/plat-omap/include/mach/onenand.h
arch/avr32/Kconfig
arch/avr32/include/asm/thread_info.h
arch/blackfin/Kconfig
arch/cris/Kconfig
arch/cris/arch-v10/drivers/ds1302.c
arch/cris/arch-v10/drivers/pcf8563.c
arch/cris/arch-v32/drivers/pcf8563.c
arch/cris/kernel/time.c
arch/frv/Kconfig
arch/h8300/Kconfig
arch/h8300/include/asm/thread_info.h
arch/ia64/Kconfig
arch/ia64/hp/common/sba_iommu.c
arch/ia64/kernel/crash_dump.c
arch/ia64/kernel/efi.c
arch/ia64/kernel/setup.c
arch/ia64/mm/init.c
arch/m32r/Kconfig
arch/m68k/Kconfig
arch/m68k/bvme6000/rtc.c
arch/m68knommu/Kconfig
arch/m68knommu/include/asm/thread_info.h
arch/mips/Kconfig
arch/mips/dec/time.c
arch/mips/include/asm/mc146818-time.h
arch/mips/pmc-sierra/yosemite/setup.c
arch/mips/sibyte/swarm/rtc_m41t81.c
arch/mips/sibyte/swarm/rtc_xicor1241.c
arch/mn10300/Kconfig
arch/mn10300/kernel/rtc.c
arch/parisc/Kconfig
arch/powerpc/Kconfig
arch/powerpc/include/asm/ps3av.h
arch/powerpc/kernel/crash_dump.c
arch/powerpc/mm/mem.c
arch/s390/Kconfig
arch/s390/include/asm/thread_info.h
arch/s390/mm/init.c
arch/sh/Kconfig
arch/sh/kernel/crash_dump.c
arch/sparc/Kconfig
arch/sparc/include/asm/thread_info_32.h
arch/sparc/include/asm/thread_info_64.h
arch/sparc64/Kconfig
arch/um/Kconfig
arch/um/sys-i386/signal.c
arch/x86/Kconfig
arch/x86/kernel/crash_dump_32.c
arch/x86/kernel/crash_dump_64.c
arch/x86/kernel/rtc.c
arch/x86/kernel/setup.c
arch/x86/mm/pageattr.c
arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c
arch/xtensa/Kconfig
drivers/acpi/battery.c
drivers/acpi/sbs.c
drivers/acpi/sleep/proc.c
drivers/acpi/system.c
drivers/base/memory.c
drivers/base/node.c
drivers/block/aoe/aoeblk.c
drivers/block/nbd.c
drivers/char/ds1286.c
drivers/char/ds1302.c
drivers/char/ip27-rtc.c
drivers/char/pc8736x_gpio.c
drivers/char/rtc.c
drivers/char/sx.c
drivers/char/sysrq.c
drivers/char/tpm/tpm.c
drivers/edac/cell_edac.c
drivers/firmware/iscsi_ibft.c
drivers/gpio/gpiolib.c
drivers/hwmon/applesmc.c
drivers/hwmon/pc87360.c
drivers/i2c/chips/at24.c
drivers/i2c/chips/ds1682.c
drivers/i2c/chips/menelaus.c
drivers/infiniband/core/cm.c
drivers/media/dvb/ttpci/av7110.c
drivers/media/video/cx18/cx18-driver.h
drivers/media/video/ivtv/ivtv-driver.h
drivers/memstick/core/mspro_block.c
drivers/misc/hp-wmi.c
drivers/mtd/Kconfig
drivers/mtd/chips/Kconfig
drivers/mtd/chips/cfi_cmdset_0001.c
drivers/mtd/chips/cfi_cmdset_0002.c
drivers/mtd/chips/cfi_probe.c
drivers/mtd/chips/cfi_util.c
drivers/mtd/chips/gen_probe.c
drivers/mtd/cmdlinepart.c
drivers/mtd/devices/Kconfig
drivers/mtd/devices/m25p80.c
drivers/mtd/devices/mtd_dataflash.c
drivers/mtd/inftlcore.c
drivers/mtd/maps/Kconfig
drivers/mtd/maps/Makefile
drivers/mtd/maps/ebony.c [deleted file]
drivers/mtd/maps/ocotea.c [deleted file]
drivers/mtd/maps/omap-toto-flash.c [deleted file]
drivers/mtd/maps/pci.c
drivers/mtd/maps/physmap_of.c
drivers/mtd/maps/walnut.c [deleted file]
drivers/mtd/mtdchar.c
drivers/mtd/mtdconcat.c
drivers/mtd/mtdoops.c
drivers/mtd/mtdpart.c
drivers/mtd/nand/Kconfig
drivers/mtd/nand/Makefile
drivers/mtd/nand/atmel_nand.c
drivers/mtd/nand/cs553x_nand.c
drivers/mtd/nand/fsl_elbc_nand.c
drivers/mtd/nand/fsl_upm.c
drivers/mtd/nand/gpio.c [new file with mode: 0644]
drivers/mtd/nand/mxc_nand.c [new file with mode: 0644]
drivers/mtd/nand/nand_base.c
drivers/mtd/nand/nand_ecc.c
drivers/mtd/nand/nandsim.c
drivers/mtd/nand/pxa3xx_nand.c
drivers/mtd/nand/sh_flctl.c [new file with mode: 0644]
drivers/mtd/nand/toto.c [deleted file]
drivers/mtd/ofpart.c
drivers/mtd/onenand/Kconfig
drivers/mtd/onenand/Makefile
drivers/mtd/onenand/omap2.c [new file with mode: 0644]
drivers/mtd/onenand/onenand_base.c
drivers/mtd/ssfdc.c
drivers/mtd/ubi/cdev.c
drivers/mtd/ubi/scan.c
drivers/mtd/ubi/vtbl.c
drivers/pci/intel-iommu.c
drivers/pci/pci.c
drivers/pci/probe.c
drivers/pci/rom.c
drivers/pci/setup-bus.c
drivers/pci/setup-res.c
drivers/power/power_supply_sysfs.c
drivers/ps3/ps3av.c
drivers/ps3/ps3av_cmd.c
drivers/rtc/rtc-at91rm9200.c
drivers/rtc/rtc-bq4802.c
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-ds1216.c
drivers/rtc/rtc-ds1302.c
drivers/rtc/rtc-ds1305.c
drivers/rtc/rtc-ds1307.c
drivers/rtc/rtc-ds1511.c
drivers/rtc/rtc-ds1553.c
drivers/rtc/rtc-ds1742.c
drivers/rtc/rtc-fm3130.c
drivers/rtc/rtc-isl1208.c
drivers/rtc/rtc-m41t80.c
drivers/rtc/rtc-m41t94.c
drivers/rtc/rtc-m48t59.c
drivers/rtc/rtc-m48t86.c
drivers/rtc/rtc-max6900.c
drivers/rtc/rtc-max6902.c
drivers/rtc/rtc-omap.c
drivers/rtc/rtc-pcf8563.c
drivers/rtc/rtc-pcf8583.c
drivers/rtc/rtc-r9701.c
drivers/rtc/rtc-rs5c313.c
drivers/rtc/rtc-rs5c348.c
drivers/rtc/rtc-rs5c372.c
drivers/rtc/rtc-s35390a.c
drivers/rtc/rtc-s3c.c
drivers/rtc/rtc-sh.c
drivers/rtc/rtc-stk17ta8.c
drivers/rtc/rtc-v3020.c
drivers/rtc/rtc-x1205.c
drivers/scsi/arcmsr/arcmsr_attr.c
drivers/scsi/sr_vendor.c
drivers/serial/8250_gsc.c
drivers/serial/serial_txx9.c
drivers/serial/sn_console.c
drivers/staging/go7007/Kconfig
drivers/staging/sxg/Kconfig
drivers/telephony/phonedev.c
drivers/video/fbmem.c
drivers/w1/slaves/w1_ds2760.c
fs/Kconfig
fs/Kconfig.binfmt
fs/binfmt_elf.c
fs/buffer.c
fs/cifs/Kconfig [new file with mode: 0644]
fs/cifs/file.c
fs/exec.c
fs/ext3/balloc.c
fs/ext3/dir.c
fs/ext3/inode.c
fs/ext3/resize.c
fs/ext3/super.c
fs/hfsplus/extents.c
fs/hfsplus/inode.c
fs/jbd/commit.c
fs/jbd/transaction.c
fs/jffs2/Kconfig [new file with mode: 0644]
fs/jffs2/compr.c
fs/jffs2/dir.c
fs/jffs2/erase.c
fs/jffs2/fs.c
fs/jffs2/nodemgmt.c
fs/jffs2/wbuf.c
fs/nfs/dir.c
fs/ntfs/file.c
fs/proc/proc_misc.c
fs/proc/vmcore.c
fs/ramfs/file-nommu.c
fs/ramfs/inode.c
fs/seq_file.c
include/asm-cris/thread_info.h
include/asm-generic/rtc.h
include/asm-m68k/thread_info.h
include/asm-parisc/thread_info.h
include/asm-um/thread_info.h
include/asm-xtensa/thread_info.h
include/linux/Kbuild
include/linux/backing-dev.h
include/linux/bcd.h
include/linux/bitmap.h
include/linux/buffer_head.h
include/linux/byteorder/Kbuild
include/linux/byteorder/big_endian.h
include/linux/byteorder/little_endian.h
include/linux/cgroup.h
include/linux/cgroup_subsys.h
include/linux/crash_dump.h
include/linux/ext3_fs.h
include/linux/fb.h
include/linux/freezer.h
include/linux/jbd.h
include/linux/memcontrol.h
include/linux/migrate.h
include/linux/mm.h
include/linux/mm_inline.h
include/linux/mm_types.h
include/linux/mmzone.h
include/linux/mtd/cfi.h
include/linux/mtd/flashchip.h
include/linux/mtd/mtd.h
include/linux/mtd/nand-gpio.h [new file with mode: 0644]
include/linux/mtd/nand.h
include/linux/mtd/onenand_regs.h
include/linux/mtd/partitions.h
include/linux/mtd/sh_flctl.h [new file with mode: 0644]
include/linux/page-flags.h
include/linux/page_cgroup.h [new file with mode: 0644]
include/linux/pagemap.h
include/linux/pagevec.h
include/linux/pci.h
include/linux/ptrace.h
include/linux/rmap.h
include/linux/sched.h
include/linux/seq_file.h
include/linux/swab.h
include/linux/swap.h
include/linux/sysfs.h
include/linux/vmalloc.h
include/linux/vmstat.h
include/net/netns/x_tables.h
init/Kconfig
init/main.c
ipc/mqueue.c
ipc/shm.c
kernel/Kconfig.freezer [new file with mode: 0644]
kernel/Makefile
kernel/cgroup.c
kernel/cgroup_debug.c
kernel/cgroup_freezer.c [new file with mode: 0644]
kernel/configs.c
kernel/cpuset.c
kernel/freezer.c [new file with mode: 0644]
kernel/kexec.c
kernel/kthread.c
kernel/power/process.c
kernel/ptrace.c
kernel/rcupreempt.c
kernel/sysctl.c
lib/bitmap.c
lib/vsprintf.c
mm/Kconfig
mm/Makefile
mm/filemap.c
mm/fremap.c
mm/hugetlb.c
mm/internal.h
mm/memcontrol.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/migrate.c
mm/mlock.c
mm/mmap.c
mm/mremap.c
mm/nommu.c
mm/page-writeback.c
mm/page_alloc.c
mm/page_cgroup.c [new file with mode: 0644]
mm/readahead.c
mm/rmap.c
mm/shmem.c
mm/swap.c
mm/swap_state.c
mm/swapfile.c
mm/truncate.c
mm/vmalloc.c
mm/vmscan.c
mm/vmstat.c
net/bridge/br_netfilter.c
net/core/dev.c
net/dccp/ipv6.c
net/dccp/minisocks.c
net/dccp/output.c
net/ipv4/arp.c
net/ipv4/netfilter/nf_nat_snmp_basic.c
net/ipv6/syncookies.c
net/ipv6/tcp_ipv6.c
net/netfilter/Kconfig
net/netfilter/ipvs/Kconfig
net/netfilter/nf_conntrack_netlink.c
net/netfilter/xt_NFQUEUE.c
net/netfilter/xt_iprange.c
net/netfilter/xt_recent.c
net/sched/sch_generic.c
security/device_cgroup.c
sound/core/pcm_misc.c
sound/drivers/dummy.c
sound/pci/ca0106/ca0106_main.c
sound/ppc/snd_ps3.c
sound/ppc/snd_ps3.h
sound/soc/omap/omap-mcbsp.c

index dfab12f809ed9c678638844ced3717742986ab26..eba9bf953ef5bc2e21bc670d33c845a8483e20a6 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -66,6 +66,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
 Koushik <raghavendra.koushik@neterion.com>
 Leonid I Ananiev <leonid.i.ananiev@intel.com>
 Linas Vepstas <linas@austin.ibm.com>
+Mark Brown <broonie@sirena.org.uk>
 Matthieu CASTET <castet.matthieu@free.fr>
 Michael Buesch <mb@bu3sch.de>
 Michael Buesch <mbuesch@freenet.de>
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt
new file mode 100644 (file)
index 0000000..c50ab58
--- /dev/null
@@ -0,0 +1,99 @@
+       The cgroup freezer is useful to batch job management system which start
+and stop sets of tasks in order to schedule the resources of a machine
+according to the desires of a system administrator. This sort of program
+is often used on HPC clusters to schedule access to the cluster as a
+whole. The cgroup freezer uses cgroups to describe the set of tasks to
+be started/stopped by the batch job management system. It also provides
+a means to start and stop the tasks composing the job.
+
+       The cgroup freezer will also be useful for checkpointing running groups
+of tasks. The freezer allows the checkpoint code to obtain a consistent
+image of the tasks by attempting to force the tasks in a cgroup into a
+quiescent state. Once the tasks are quiescent another task can
+walk /proc or invoke a kernel interface to gather information about the
+quiesced tasks. Checkpointed tasks can be restarted later should a
+recoverable error occur. This also allows the checkpointed tasks to be
+migrated between nodes in a cluster by copying the gathered information
+to another node and restarting the tasks there.
+
+       Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
+and resuming tasks in userspace. Both of these signals are observable
+from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
+blocked, or ignored it can be seen by waiting or ptracing parent tasks.
+SIGCONT is especially unsuitable since it can be caught by the task. Any
+programs designed to watch for SIGSTOP and SIGCONT could be broken by
+attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
+demonstrate this problem using nested bash shells:
+
+       $ echo $$
+       16644
+       $ bash
+       $ echo $$
+       16690
+
+       From a second, unrelated bash shell:
+       $ kill -SIGSTOP 16690
+       $ kill -SIGCONT 16990
+
+       <at this point 16990 exits and causes 16644 to exit too>
+
+       This happens because bash can observe both signals and choose how it
+responds to them.
+
+       Another example of a program which catches and responds to these
+signals is gdb. In fact any program designed to use ptrace is likely to
+have a problem with this method of stopping and resuming tasks.
+
+        In contrast, the cgroup freezer uses the kernel freezer code to
+prevent the freeze/unfreeze cycle from becoming visible to the tasks
+being frozen. This allows the bash example above and gdb to run as
+expected.
+
+       The freezer subsystem in the container filesystem defines a file named
+freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the
+cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup.
+Reading will return the current state.
+
+* Examples of usage :
+
+   # mkdir /containers/freezer
+   # mount -t cgroup -ofreezer freezer  /containers
+   # mkdir /containers/0
+   # echo $some_pid > /containers/0/tasks
+
+to get status of the freezer subsystem :
+
+   # cat /containers/0/freezer.state
+   THAWED
+
+to freeze all tasks in the container :
+
+   # echo FROZEN > /containers/0/freezer.state
+   # cat /containers/0/freezer.state
+   FREEZING
+   # cat /containers/0/freezer.state
+   FROZEN
+
+to unfreeze all tasks in the container :
+
+   # echo THAWED > /containers/0/freezer.state
+   # cat /containers/0/freezer.state
+   THAWED
+
+This is the basic mechanism which should do the right thing for user space task
+in a simple scenario.
+
+It's important to note that freezing can be incomplete. In that case we return
+EBUSY. This means that some tasks in the cgroup are busy doing something that
+prevents us from completely freezing the cgroup at this time. After EBUSY,
+the cgroup will remain partially frozen -- reflected by freezer.state reporting
+"FREEZING" when read. The state will remain "FREEZING" until one of these
+things happens:
+
+       1) Userspace cancels the freezing operation by writing "THAWED" to
+               the freezer.state file
+       2) Userspace retries the freezing operation by writing "FROZEN" to
+               the freezer.state file (writing "FREEZING" is not legal
+               and returns EIO)
+       3) The tasks that blocked the cgroup from entering the "FROZEN"
+               state disappear from the cgroup's set of tasks.
index 9b53d5827361fd647f3388212e648f502defc698..1c07547d3f81f28a19edb9de8752f8865b44478d 100644 (file)
@@ -112,14 +112,22 @@ the per cgroup LRU.
 
 2.2.1 Accounting details
 
-All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted.
-RSS pages are accounted at the time of page_add_*_rmap() unless they've already
-been accounted for earlier. A file page will be accounted for as Page Cache;
-it's mapped into the page tables of a process, duplicate accounting is carefully
-avoided. Page Cache pages are accounted at the time of add_to_page_cache().
-The corresponding routines that remove a page from the page tables or removes
-a page from Page Cache is used to decrement the accounting counters of the
-cgroup.
+All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
+(some pages which never be reclaimable and will not be on global LRU
+ are not accounted. we just accounts pages under usual vm management.)
+
+RSS pages are accounted at page_fault unless they've already been accounted
+for earlier. A file page will be accounted for as Page Cache when it's
+inserted into inode (radix-tree). While it's mapped into the page tables of
+processes, duplicate accounting is carefully avoided.
+
+A RSS page is unaccounted when it's fully unmapped. A PageCache page is
+unaccounted when it's removed from radix-tree.
+
+At page migration, accounting information is kept.
+
+Note: we just account pages-on-lru because our purpose is to control amount
+of used pages. not-on-lru pages are tend to be out-of-control from vm view.
 
 2.3 Shared Page Accounting
 
index 47e568a9370afa28f703acf9ce55f88a8ca52519..5c86c258c7913dd9c97da4a3e4f7a0dd666745bc 100644 (file)
@@ -48,7 +48,7 @@ hooks, beyond what is already present, required to manage dynamic
 job placement on large systems.
 
 Cpusets use the generic cgroup subsystem described in
-Documentation/cgroup.txt.
+Documentation/cgroups/cgroups.txt.
 
 Requests by a task, using the sched_setaffinity(2) system call to
 include CPUs in its CPU affinity mask, and using the mbind(2) and
index 295f26cd895a0d5cd460267a84ebbb687978fa69..9dd2a3bb2acc87b85d473a2080284910f04f8585 100644 (file)
@@ -96,6 +96,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error.
 errors=continue                Keep going on a filesystem error.
 errors=panic           Panic and halt the machine if an error occurs.
 
+data_err=ignore(*)     Just print an error message if an error occurs
+                       in a file data buffer in ordered mode.
+data_err=abort         Abort the journal if an error occurs in a file
+                       data buffer in ordered mode.
+
 grpid                  Give objects the same group ID as their creator.
 bsdgroups
 
index c032bf39e8b9b581b6ada65f7a99b97b2dae7019..bcceb99b81dd8038130fd9b1032b3e4ba80dc113 100644 (file)
@@ -1384,15 +1384,18 @@ causes the kernel to prefer to reclaim dentries and inodes.
 dirty_background_ratio
 ----------------------
 
-Contains, as a percentage of total system memory, the number of pages at which
-the pdflush background writeback daemon will start writing out dirty data.
+Contains, as a percentage of the dirtyable system memory (free pages + mapped
+pages + file cache, not including locked pages and HugePages), the number of
+pages at which the pdflush background writeback daemon will start writing out
+dirty data.
 
 dirty_ratio
 -----------------
 
-Contains, as a percentage of total system memory, the number of pages at which
-a process which is generating disk writes will itself start writing out dirty
-data.
+Contains, as a percentage of the dirtyable system memory (free pages + mapped
+pages + file cache, not including locked pages and HugePages), the number of
+pages at which a process which is generating disk writes will itself start
+writing out dirty data.
 
 dirty_writeback_centisecs
 -------------------------
@@ -2412,24 +2415,29 @@ will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
 of memory types. If a bit of the bitmask is set, memory segments of the
 corresponding memory type are dumped, otherwise they are not dumped.
 
-The following 4 memory types are supported:
+The following 7 memory types are supported:
   - (bit 0) anonymous private memory
   - (bit 1) anonymous shared memory
   - (bit 2) file-backed private memory
   - (bit 3) file-backed shared memory
   - (bit 4) ELF header pages in file-backed private memory areas (it is
             effective only if the bit 2 is cleared)
+  - (bit 5) hugetlb private memory
+  - (bit 6) hugetlb shared memory
 
   Note that MMIO pages such as frame buffer are never dumped and vDSO pages
   are always dumped regardless of the bitmask status.
 
-Default value of coredump_filter is 0x3; this means all anonymous memory
-segments are dumped.
+  Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
+  effected by bit 5-6.
+
+Default value of coredump_filter is 0x23; this means all anonymous memory
+segments and hugetlb private memory are dumped.
 
 If you don't want to dump all shared memory segments attached to pid 1234,
-write 1 to the process's proc file.
+write 0x21 to the process's proc file.
 
-  $ echo 0x1 > /proc/1234/coredump_filter
+  $ echo 0x21 > /proc/1234/coredump_filter
 
 When a new process is created, the process inherits the bitmask status from its
 parent. It is useful to set up coredump_filter before the program runs.
index bcecfaa1e7701cea682367553551e4fb54791dca..0f1544f67400b4288887649bf73b9fb09449b92c 100644 (file)
@@ -690,7 +690,7 @@ and is between 256 and 4096 characters. It is defined in the file
                        See Documentation/block/as-iosched.txt and
                        Documentation/block/deadline-iosched.txt for details.
 
-       elfcorehdr=     [X86-32, X86_64]
+       elfcorehdr=     [IA64,PPC,SH,X86-32,X86_64]
                        Specifies physical address of start of kernel core
                        image elf header. Generally kexec loader will
                        pass this option to capture kernel.
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
new file mode 100644 (file)
index 0000000..bdf93b7
--- /dev/null
@@ -0,0 +1,714 @@
+Introduction
+============
+
+Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
+I felt there was room for optimisation. I bashed the code for a few hours
+performing tricks like table lookup removing superfluous code etc.
+After that the speed was increased by 35-40%.
+Still I was not too happy as I felt there was additional room for improvement.
+
+Bad! I was hooked.
+I decided to annotate my steps in this file. Perhaps it is useful to someone
+or someone learns something from it.
+
+
+The problem
+===========
+
+NAND flash (at least SLC one) typically has sectors of 256 bytes.
+However NAND flash is not extremely reliable so some error detection
+(and sometimes correction) is needed.
+
+This is done by means of a Hamming code. I'll try to explain it in
+laymans terms (and apologies to all the pro's in the field in case I do
+not use the right terminology, my coding theory class was almost 30
+years ago, and I must admit it was not one of my favourites).
+
+As I said before the ecc calculation is performed on sectors of 256
+bytes. This is done by calculating several parity bits over the rows and
+columns. The parity used is even parity which means that the parity bit = 1
+if the data over which the parity is calculated is 1 and the parity bit = 0
+if the data over which the parity is calculated is 0. So the total
+number of bits over the data over which the parity is calculated + the
+parity bit is even. (see wikipedia if you can't follow this).
+Parity is often calculated by means of an exclusive or operation,
+sometimes also referred to as xor. In C the operator for xor is ^
+
+Back to ecc.
+Let's give a small figure:
+
+byte   0:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp4 ... rp14
+byte   1:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp2 rp4 ... rp14
+byte   2:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp4 ... rp14
+byte   3:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp4 ... rp14
+byte   4:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp2 rp5 ... rp14
+....
+byte 254:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp0 rp3 rp5 ... rp15
+byte 255:  bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0   rp1 rp3 rp5 ... rp15
+           cp1  cp0  cp1  cp0  cp1  cp0  cp1  cp0
+           cp3  cp3  cp2  cp2  cp3  cp3  cp2  cp2
+           cp5  cp5  cp5  cp5  cp4  cp4  cp4  cp4
+
+This figure represents a sector of 256 bytes.
+cp is my abbreviaton for column parity, rp for row parity.
+
+Let's start to explain column parity.
+cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
+so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
+Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
+cp2 is the parity over bit0, bit1, bit4 and bit5
+cp3 is the parity over bit2, bit3, bit6 and bit7.
+cp4 is the parity over bit0, bit1, bit2 and bit3.
+cp5 is the parity over bit4, bit5, bit6 and bit7.
+Note that each of cp0 .. cp5 is exactly one bit.
+
+Row parity actually works almost the same.
+rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
+rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
+rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
+(so handle two bytes, then skip 2 bytes).
+rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
+for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
+so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
+and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
+The story now becomes quite boring. I guess you get the idea.
+rp6 covers 8 bytes then skips 8 etc
+rp7 skips 8 bytes then covers 8 etc
+rp8 covers 16 bytes then skips 16 etc
+rp9 skips 16 bytes then covers 16 etc
+rp10 covers 32 bytes then skips 32 etc
+rp11 skips 32 bytes then covers 32 etc
+rp12 covers 64 bytes then skips 64 etc
+rp13 skips 64 bytes then covers 64 etc
+rp14 covers 128 bytes then skips 128
+rp15 skips 128 bytes then covers 128
+
+In the end the parity bits are grouped together in three bytes as
+follows:
+ECC    Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
+ECC 0   rp07  rp06  rp05  rp04  rp03  rp02  rp01  rp00
+ECC 1   rp15  rp14  rp13  rp12  rp11  rp10  rp09  rp08
+ECC 2   cp5   cp4   cp3   cp2   cp1   cp0      1     1
+
+I detected after writing this that ST application note AN1823
+(http://www.st.com/stonline/books/pdf/docs/10123.pdf) gives a much
+nicer picture.(but they use line parity as term where I use row parity)
+Oh well, I'm graphically challenged, so suffer with me for a moment :-)
+And I could not reuse the ST picture anyway for copyright reasons.
+
+
+Attempt 0
+=========
+
+Implementing the parity calculation is pretty simple.
+In C pseudocode:
+for (i = 0; i < 256; i++)
+{
+    if (i & 0x01)
+       rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+    else
+       rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+    if (i & 0x02)
+       rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
+    else
+       rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
+    if (i & 0x04)
+      rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
+    else
+      rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
+    if (i & 0x08)
+      rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
+    else
+      rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
+    if (i & 0x10)
+      rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
+    else
+      rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
+    if (i & 0x20)
+      rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
+    else
+    rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
+    if (i & 0x40)
+      rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
+    else
+      rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
+    if (i & 0x80)
+      rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
+    else
+      rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
+    cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
+    cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
+    cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
+    cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
+    cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
+    cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
+}
+
+
+Analysis 0
+==========
+
+C does have bitwise operators but not really operators to do the above
+efficiently (and most hardware has no such instructions either).
+Therefore without implementing this it was clear that the code above was
+not going to bring me a Nobel prize :-)
+
+Fortunately the exclusive or operation is commutative, so we can combine
+the values in any order. So instead of calculating all the bits
+individually, let us try to rearrange things.
+For the column parity this is easy. We can just xor the bytes and in the
+end filter out the relevant bits. This is pretty nice as it will bring
+all cp calculation out of the if loop.
+
+Similarly we can first xor the bytes for the various rows.
+This leads to:
+
+
+Attempt 1
+=========
+
+const char parity[256] = {
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
+};
+
+void ecc1(const unsigned char *buf, unsigned char *code)
+{
+    int i;
+    const unsigned char *bp = buf;
+    unsigned char cur;
+    unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+    unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+    unsigned char par;
+
+    par = 0;
+    rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+    rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+    rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+    rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+    for (i = 0; i < 256; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
+        if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
+        if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
+        if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
+        if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
+        if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
+        if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
+    }
+    code[0] =
+        (parity[rp7] << 7) |
+        (parity[rp6] << 6) |
+        (parity[rp5] << 5) |
+        (parity[rp4] << 4) |
+        (parity[rp3] << 3) |
+        (parity[rp2] << 2) |
+        (parity[rp1] << 1) |
+        (parity[rp0]);
+    code[1] =
+        (parity[rp15] << 7) |
+        (parity[rp14] << 6) |
+        (parity[rp13] << 5) |
+        (parity[rp12] << 4) |
+        (parity[rp11] << 3) |
+        (parity[rp10] << 2) |
+        (parity[rp9]  << 1) |
+        (parity[rp8]);
+    code[2] =
+        (parity[par & 0xf0] << 7) |
+        (parity[par & 0x0f] << 6) |
+        (parity[par & 0xcc] << 5) |
+        (parity[par & 0x33] << 4) |
+        (parity[par & 0xaa] << 3) |
+        (parity[par & 0x55] << 2);
+    code[0] = ~code[0];
+    code[1] = ~code[1];
+    code[2] = ~code[2];
+}
+
+Still pretty straightforward. The last three invert statements are there to
+give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
+all data is 0xff, so the checksum then matches.
+
+I also introduced the parity lookup. I expected this to be the fastest
+way to calculate the parity, but I will investigate alternatives later
+on.
+
+
+Analysis 1
+==========
+
+The code works, but is not terribly efficient. On my system it took
+almost 4 times as much time as the linux driver code. But hey, if it was
+*that* easy this would have been done long before.
+No pain. no gain.
+
+Fortunately there is plenty of room for improvement.
+
+In step 1 we moved from bit-wise calculation to byte-wise calculation.
+However in C we can also use the unsigned long data type and virtually
+every modern microprocessor supports 32 bit operations, so why not try
+to write our code in such a way that we process data in 32 bit chunks.
+
+Of course this means some modification as the row parity is byte by
+byte. A quick analysis:
+for the column parity we use the par variable. When extending to 32 bits
+we can in the end easily calculate p0 and p1 from it.
+(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
+respectively)
+also rp2 and rp3 can be easily retrieved from par as rp3 covers the
+first two bytes and rp2 the last two bytes.
+
+Note that of course now the loop is executed only 64 times (256/4).
+And note that care must taken wrt byte ordering. The way bytes are
+ordered in a long is machine dependent, and might affect us.
+Anyway, if there is an issue: this code is developed on x86 (to be
+precise: a DELL PC with a D920 Intel CPU)
+
+And of course the performance might depend on alignment, but I expect
+that the I/O buffers in the nand driver are aligned properly (and
+otherwise that should be fixed to get maximum performance).
+
+Let's give it a try...
+
+
+Attempt 2
+=========
+
+extern const char parity[256];
+
+void ecc2(const unsigned char *buf, unsigned char *code)
+{
+    int i;
+    const unsigned long *bp = (unsigned long *)buf;
+    unsigned long cur;
+    unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+    unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+    unsigned long par;
+
+    par = 0;
+    rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+    rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+    rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+    rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+    for (i = 0; i < 64; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+        if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+        if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+        if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+        if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+    }
+    /*
+       we need to adapt the code generation for the fact that rp vars are now
+       long; also the column parity calculation needs to be changed.
+       we'll bring rp4 to 15 back to single byte entities by shifting and
+       xoring
+    */
+    rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
+    rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
+    rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
+    rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
+    rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
+    rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
+    rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
+    rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
+    rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
+    rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
+    rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
+    rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
+    rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
+    rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
+    par ^= (par >> 16);
+    rp1 = (par >> 8); rp1 &= 0xff;
+    rp0 = (par & 0xff);
+    par ^= (par >> 8); par &= 0xff;
+
+    code[0] =
+        (parity[rp7] << 7) |
+        (parity[rp6] << 6) |
+        (parity[rp5] << 5) |
+        (parity[rp4] << 4) |
+        (parity[rp3] << 3) |
+        (parity[rp2] << 2) |
+        (parity[rp1] << 1) |
+        (parity[rp0]);
+    code[1] =
+        (parity[rp15] << 7) |
+        (parity[rp14] << 6) |
+        (parity[rp13] << 5) |
+        (parity[rp12] << 4) |
+        (parity[rp11] << 3) |
+        (parity[rp10] << 2) |
+        (parity[rp9]  << 1) |
+        (parity[rp8]);
+    code[2] =
+        (parity[par & 0xf0] << 7) |
+        (parity[par & 0x0f] << 6) |
+        (parity[par & 0xcc] << 5) |
+        (parity[par & 0x33] << 4) |
+        (parity[par & 0xaa] << 3) |
+        (parity[par & 0x55] << 2);
+    code[0] = ~code[0];
+    code[1] = ~code[1];
+    code[2] = ~code[2];
+}
+
+The parity array is not shown any more. Note also that for these
+examples I kinda deviated from my regular programming style by allowing
+multiple statements on a line, not using { } in then and else blocks
+with only a single statement and by using operators like ^=
+
+
+Analysis 2
+==========
+
+The code (of course) works, and hurray: we are a little bit faster than
+the linux driver code (about 15%). But wait, don't cheer too quickly.
+THere is more to be gained.
+If we look at e.g. rp14 and rp15 we see that we either xor our data with
+rp14 or with rp15. However we also have par which goes over all data.
+This means there is no need to calculate rp14 as it can be calculated from
+rp15 through rp14 = par ^ rp15;
+(or if desired we can avoid calculating rp15 and calculate it from
+rp14).  That is why some places refer to inverse parity.
+Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
+Effectively this means we can eliminate the else clause from the if
+statements. Also we can optimise the calculation in the end a little bit
+by going from long to byte first. Actually we can even avoid the table
+lookups
+
+Attempt 3
+=========
+
+Odd replaced:
+        if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+        if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+        if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+        if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+        if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+with
+        if (i & 0x01) rp5 ^= cur;
+        if (i & 0x02) rp7 ^= cur;
+        if (i & 0x04) rp9 ^= cur;
+        if (i & 0x08) rp11 ^= cur;
+        if (i & 0x10) rp13 ^= cur;
+        if (i & 0x20) rp15 ^= cur;
+
+        and outside the loop added:
+    rp4  = par ^ rp5;
+    rp6  = par ^ rp7;
+    rp8  = par ^ rp9;
+    rp10  = par ^ rp11;
+    rp12  = par ^ rp13;
+    rp14  = par ^ rp15;
+
+And after that the code takes about 30% more time, although the number of
+statements is reduced. This is also reflected in the assembly code.
+
+
+Analysis 3
+==========
+
+Very weird. Guess it has to do with caching or instruction parallellism
+or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
+observation was that this one is only 30% slower (according to time)
+executing the code as my 3Ghz D920 processor.
+
+Well, it was expected not to be easy so maybe instead move to a
+different track: let's move back to the code from attempt2 and do some
+loop unrolling. This will eliminate a few if statements. I'll try
+different amounts of unrolling to see what works best.
+
+
+Attempt 4
+=========
+
+Unrolled the loop 1, 2, 3 and 4 times.
+For 4 the code starts with:
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++;
+        par ^= cur;
+        rp4 ^= cur;
+        rp6 ^= cur;
+        rp8 ^= cur;
+        rp10 ^= cur;
+        if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
+        if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
+        cur = *bp++;
+        par ^= cur;
+        rp5 ^= cur;
+        rp6 ^= cur;
+        ...
+
+
+Analysis 4
+==========
+
+Unrolling once gains about 15%
+Unrolling twice keeps the gain at about 15%
+Unrolling three times gives a gain of 30% compared to attempt 2.
+Unrolling four times gives a marginal improvement compared to unrolling
+three times.
+
+I decided to proceed with a four time unrolled loop anyway. It was my gut
+feeling that in the next steps I would obtain additional gain from it.
+
+The next step was triggered by the fact that par contains the xor of all
+bytes and rp4 and rp5 each contain the xor of half of the bytes.
+So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
+that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
+eliminate rp5 (or rp4, but I already foresaw another optimisation).
+The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
+
+
+Attempt 5
+=========
+
+Effectively so all odd digit rp assignments in the loop were removed.
+This included the else clause of the if statements.
+Of course after the loop we need to correct things by adding code like:
+    rp5 = par ^ rp4;
+Also the initial assignments (rp5 = 0; etc) could be removed.
+Along the line I also removed the initialisation of rp0/1/2/3.
+
+
+Analysis 5
+==========
+
+Measurements showed this was a good move. The run-time roughly halved
+compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
+of the processor time compared to the current code in the linux kernel.
+
+However, still I thought there was more. I didn't like all the if
+statements. Why not keep a running parity and only keep the last if
+statement. Time for yet another version!
+
+
+Attempt 6
+=========
+
+THe code within the for loop was changed to:
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+           cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+           cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+           cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
+           cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= cur;
+
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+           par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+
+As you can see tmppar is used to accumulate the parity within a for
+iteration. In the last 3 statements is is added to par and, if needed,
+to rp12 and rp14.
+
+While making the changes I also found that I could exploit that tmppar
+contains the running parity for this iteration. So instead of having:
+rp4 ^= cur; rp6 = cur;
+I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next
+statement. A similar change was done for rp8 and rp10
+
+
+Analysis 6
+==========
+
+Measuring this code again showed big gain. When executing the original
+linux code 1 million times, this took about 1 second on my system.
+(using time to measure the performance). After this iteration I was back
+to 0.075 sec. Actually I had to decide to start measuring over 10
+million interations in order not to loose too much accuracy. This one
+definitely seemed to be the jackpot!
+
+There is a little bit more room for improvement though. There are three
+places with statements:
+rp4 ^= cur; rp6 ^= cur;
+It seems more efficient to also maintain a variable rp4_6 in the while
+loop; This eliminates 3 statements per loop. Of course after the loop we
+need to correct by adding:
+    rp4 ^= rp4_6;
+    rp6 ^= rp4_6
+Furthermore there are 4 sequential assingments to rp8. This can be
+encoded slightly more efficient by saving tmppar before those 4 lines
+and later do rp8 = rp8 ^ tmppar ^ notrp8;
+(where notrp8 is the value of rp8 before those 4 lines).
+Again a use of the commutative property of xor.
+Time for a new test!
+
+
+Attempt 7
+=========
+
+The new code now looks like:
+
+    for (i = 0; i < 4; i++)
+    {
+        cur = *bp++; tmppar  = cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+           cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+           cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+           notrp8 = tmppar;
+           cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+           cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+           rp8 = rp8 ^ tmppar ^ notrp8;
+
+        cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+        cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+        cur = *bp++; tmppar ^= cur;
+
+           par ^= tmppar;
+        if ((i & 0x1) == 0) rp12 ^= tmppar;
+        if ((i & 0x2) == 0) rp14 ^= tmppar;
+    }
+    rp4 ^= rp4_6;
+    rp6 ^= rp4_6;
+
+
+Not a big change, but every penny counts :-)
+
+
+Analysis 7
+==========
+
+Acutally this made things worse. Not very much, but I don't want to move
+into the wrong direction. Maybe something to investigate later. Could
+have to do with caching again.
+
+Guess that is what there is to win within the loop. Maybe unrolling one
+more time will help. I'll keep the optimisations from 7 for now.
+
+
+Attempt 8
+=========
+
+Unrolled the loop one more time.
+
+
+Analysis 8
+==========
+
+This makes things worse. Let's stick with attempt 6 and continue from there.
+Although it seems that the code within the loop cannot be optimised
+further there is still room to optimize the generation of the ecc codes.
+We can simply calcualate the total parity. If this is 0 then rp4 = rp5
+etc. If the parity is 1, then rp4 = !rp5;
+But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
+in the result byte and then do something like
+    code[0] |= (code[0] << 1);
+Lets test this.
+
+
+Attempt 9
+=========
+
+Changed the code but again this slightly degrades performance. Tried all
+kind of other things, like having dedicated parity arrays to avoid the
+shift after parity[rp7] << 7; No gain.
+Change the lookup using the parity array by using shift operators (e.g.
+replace parity[rp7] << 7 with:
+rp7 ^= (rp7 << 4);
+rp7 ^= (rp7 << 2);
+rp7 ^= (rp7 << 1);
+rp7 &= 0x80;
+No gain.
+
+The only marginal change was inverting the parity bits, so we can remove
+the last three invert statements.
+
+Ah well, pity this does not deliver more. Then again 10 million
+iterations using the linux driver code takes between 13 and 13.5
+seconds, whereas my code now takes about 0.73 seconds for those 10
+million iterations. So basically I've improved the performance by a
+factor 18 on my system. Not that bad. Of course on different hardware
+you will get different results. No warranties!
+
+But of course there is no such thing as a free lunch. The codesize almost
+tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
+
+
+Correcting errors
+=================
+
+For correcting errors I again used the ST application note as a starter,
+but I also peeked at the existing code.
+The algorithm itself is pretty straightforward. Just xor the given and
+the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
+are 1 we have one correctable bit error. If there is 1 bit 1, we have an
+error in the given ecc code.
+It proved to be fastest to do some table lookups. Performance gain
+introduced by this is about a factor 2 on my system when a repair had to
+be done, and 1% or so if no repair had to be done.
+Code size increased from 330 bytes to 686 bytes for this function.
+(gcc 4.2, -O3)
+
+
+Conclusion
+==========
+
+The gain when calculating the ecc is tremendous. Om my development hardware
+a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
+embedded system with a MIPS core a factor 7 was obtained.
+On  a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
+5 (big endian mode, gcc 4.1.2, -O3)
+For correction not much gain could be obtained (as bitflips are rare). Then
+again there are also much less cycles spent there.
+
+It seems there is not much more gain possible in this, at least when
+programmed in C. Of course it might be possible to squeeze something more
+out of it with an assembler program, but due to pipeline behaviour etc
+this is very tricky (at least for intel hw).
+
+Author: Frans Meulenbroeks
+Copyright (C) 2008 Koninklijke Philips Electronics NV.
index 5ce0952aa06546096fc165d9562f35565ed17025..49378a9f2b5f276c4a050e76b0898c25a74303e0 100644 (file)
@@ -95,7 +95,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'p'     - Will dump the current registers and flags to your console.
 
-'q'     - Will dump a list of all running timers.
+'q'     - Will dump a list of all running hrtimers.
+         WARNING: Does not cover any other timers
 
 'r'     - Turns off keyboard raw mode and sets it to XLATE.
 
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
new file mode 100644 (file)
index 0000000..125eed5
--- /dev/null
@@ -0,0 +1,615 @@
+
+This document describes the Linux memory management "Unevictable LRU"
+infrastructure and the use of this infrastructure to manage several types
+of "unevictable" pages.  The document attempts to provide the overall
+rationale behind this mechanism and the rationale for some of the design
+decisions that drove the implementation.  The latter design rationale is
+discussed in the context of an implementation description.  Admittedly, one
+can obtain the implementation details--the "what does it do?"--by reading the
+code.  One hopes that the descriptions below add value by provide the answer
+to "why does it do that?".
+
+Unevictable LRU Infrastructure:
+
+The Unevictable LRU adds an additional LRU list to track unevictable pages
+and to hide these pages from vmscan.  This mechanism is based on a patch by
+Larry Woodman of Red Hat to address several scalability problems with page
+reclaim in Linux.  The problems have been observed at customer sites on large
+memory x86_64 systems.  For example, a non-numal x86_64 platform with 128GB
+of main memory will have over 32 million 4k pages in a single zone.  When a
+large fraction of these pages are not evictable for any reason [see below],
+vmscan will spend a lot of time scanning the LRU lists looking for the small
+fraction of pages that are evictable.  This can result in a situation where
+all cpus are spending 100% of their time in vmscan for hours or days on end,
+with the system completely unresponsive.
+
+The Unevictable LRU infrastructure addresses the following classes of
+unevictable pages:
+
++ page owned by ramfs
++ page mapped into SHM_LOCKed shared memory regions
++ page mapped into VM_LOCKED [mlock()ed] vmas
+
+The infrastructure might be able to handle other conditions that make pages
+unevictable, either by definition or by circumstance, in the future.
+
+
+The Unevictable LRU List
+
+The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
+called the "unevictable" list and an associated page flag, PG_unevictable, to
+indicate that the page is being managed on the unevictable list.  The
+PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active
+flag in that it indicates on which LRU list a page resides when PG_lru is set.
+The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU
+Kconfig option.
+
+The Unevictable LRU infrastructure maintains unevictable pages on an additional
+LRU list for a few reasons:
+
+1) We get to "treat unevictable pages just like we treat other pages in the
+   system, which means we get to use the same code to manipulate them, the
+   same code to isolate them (for migrate, etc.), the same code to keep track
+   of the statistics, etc..." [Rik van Riel]
+
+2) We want to be able to migrate unevictable pages between nodes--for memory
+   defragmentation, workload management and memory hotplug.  The linux kernel
+   can only migrate pages that it can successfully isolate from the lru lists.
+   If we were to maintain pages elsewise than on an lru-like list, where they
+   can be found by isolate_lru_page(), we would prevent their migration, unless
+   we reworked migration code to find the unevictable pages.
+
+
+The unevictable LRU list does not differentiate between file backed and swap
+backed [anon] pages.  This differentiation is only important while the pages
+are, in fact, evictable.
+
+The unevictable LRU list benefits from the "arrayification" of the per-zone
+LRU lists and statistics originally proposed and posted by Christoph Lameter.
+
+The unevictable list does not use the lru pagevec mechanism. Rather,
+unevictable pages are placed directly on the page's zone's unevictable
+list under the zone lru_lock.  The reason for this is to prevent stranding
+of pages on the unevictable list when one task has the page isolated from the
+lru and other tasks are changing the "evictability" state of the page.
+
+
+Unevictable LRU and Memory Controller Interaction
+
+The memory controller data structure automatically gets a per zone unevictable
+lru list as a result of the "arrayification" of the per-zone LRU lists.  The
+memory controller tracks the movement of pages to and from the unevictable list.
+When a memory control group comes under memory pressure, the controller will
+not attempt to reclaim pages on the unevictable list.  This has a couple of
+effects.  Because the pages are "hidden" from reclaim on the unevictable list,
+the reclaim process can be more efficient, dealing only with pages that have
+a chance of being reclaimed.  On the other hand, if too many of the pages
+charged to the control group are unevictable, the evictable portion of the
+working set of the tasks in the control group may not fit into the available
+memory.  This can cause the control group to thrash or to oom-kill tasks.
+
+
+Unevictable LRU:  Detecting Unevictable Pages
+
+The function page_evictable(page, vma) in vmscan.c determines whether a
+page is evictable or not.  For ramfs pages and pages in SHM_LOCKed regions,
+page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's
+address space using a wrapper function.  Wrapper functions are used to set,
+clear and test the flag to reduce the requirement for #ifdef's throughout the
+source code.  AS_UNEVICTABLE is set on ramfs inode/mapping when it is created.
+This flag remains for the life of the inode.
+
+For shared memory regions, AS_UNEVICTABLE is set when an application
+successfully SHM_LOCKs the region and is removed when the region is
+SHM_UNLOCKed.  Note that shmctl(SHM_LOCK, ...) does not populate the page
+tables for the region as does, for example, mlock().   So, we make no special
+effort to push any pages in the SHM_LOCKed region to the unevictable list.
+Vmscan will do this when/if it encounters the pages during reclaim.  On
+SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the
+unevictable list if no other condition keeps them unevictable.  If a SHM_LOCKed
+region is destroyed, the pages are also "rescued" from the unevictable list in
+the process of freeing them.
+
+page_evictable() detects mlock()ed pages by testing an additional page flag,
+PG_mlocked via the PageMlocked() wrapper.  If the page is NOT mlocked, and a
+non-NULL vma is supplied, page_evictable() will check whether the vma is
+VM_LOCKED via is_mlocked_vma().  is_mlocked_vma() will SetPageMlocked() and
+update the appropriate statistics if the vma is VM_LOCKED.  This method allows
+efficient "culling" of pages in the fault path that are being faulted in to
+VM_LOCKED vmas.
+
+
+Unevictable Pages and Vmscan [shrink_*_list()]
+
+If unevictable pages are culled in the fault path, or moved to the unevictable
+list at mlock() or mmap() time, vmscan will never encounter the pages until
+they have become evictable again, for example, via munlock() and have been
+"rescued" from the unevictable list.  However, there may be situations where we
+decide, for the sake of expediency, to leave a unevictable page on one of the
+regular active/inactive LRU lists for vmscan to deal with.  Vmscan checks for
+such pages in all of the shrink_{active|inactive|page}_list() functions and
+will "cull" such pages that it encounters--that is, it diverts those pages to
+the unevictable list for the zone being scanned.
+
+There may be situations where a page is mapped into a VM_LOCKED vma, but the
+page is not marked as PageMlocked.  Such pages will make it all the way to
+shrink_page_list() where they will be detected when vmscan walks the reverse
+map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
+will cull the page at that point.
+
+Note that for anonymous pages, shrink_page_list() attempts to add the page to
+the swap cache before it tries to unmap the page.  To avoid this unnecessary
+consumption of swap space, shrink_page_list() calls try_to_munlock() to check
+whether any VM_LOCKED vmas map the page without attempting to unmap the page.
+If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page
+without consuming swap space.  try_to_munlock() will be described below.
+
+To "cull" an unevictable page, vmscan simply puts the page back on the lru
+list using putback_lru_page()--the inverse operation to isolate_lru_page()--
+after dropping the page lock.  Because the condition which makes the page
+unevictable may change once the page is unlocked, putback_lru_page() will
+recheck the unevictable state of a page that it places on the unevictable lru
+list.  If the page has become unevictable, putback_lru_page() removes it from
+the list and retries, including the page_unevictable() test.  Because such a
+race is a rare event and movement of pages onto the unevictable list should be
+rare, these extra evictabilty checks should not occur in the majority of calls
+to putback_lru_page().
+
+
+Mlocked Page:  Prior Work
+
+The "Unevictable Mlocked Pages" infrastructure is based on work originally
+posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
+Nick posted his patch as an alternative to a patch posted by Christoph
+Lameter to achieve the same objective--hiding mlocked pages from vmscan.
+In Nick's patch, he used one of the struct page lru list link fields as a count
+of VM_LOCKED vmas that map the page.  This use of the link field for a count
+prevented the management of the pages on an LRU list.  Thus, mlocked pages were
+not migratable as isolate_lru_page() could not find them and the lru list link
+field was not available to the migration subsystem.  Nick resolved this by
+putting mlocked pages back on the lru list before attempting to isolate them,
+thus abandoning the count of VM_LOCKED vmas.  When Nick's patch was integrated
+with the Unevictable LRU work, the count was replaced by walking the reverse
+map to determine whether any VM_LOCKED vmas mapped the page.  More on this
+below.
+
+
+Mlocked Pages:  Basic Management
+
+Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of
+unevictable pages.  When such a page has been "noticed" by the memory
+management subsystem, the page is marked with the PG_mlocked [PageMlocked()]
+flag.  A PageMlocked() page will be placed on the unevictable LRU list when
+it is added to the LRU.   Pages can be "noticed" by memory management in
+several places:
+
+1) in the mlock()/mlockall() system call handlers.
+2) in the mmap() system call handler when mmap()ing a region with the
+   MAP_LOCKED flag, or mmap()ing a region in a task that has called
+   mlockall() with the MCL_FUTURE flag.  Both of these conditions result
+   in the VM_LOCKED flag being set for the vma.
+3) in the fault path, if mlocked pages are "culled" in the fault path,
+   and when a VM_LOCKED stack segment is expanded.
+4) as mentioned above, in vmscan:shrink_page_list() with attempting to
+   reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock().
+
+Mlocked pages become unlocked and rescued from the unevictable list when:
+
+1) mapped in a range unlocked via the munlock()/munlockall() system calls.
+2) munmapped() out of the last VM_LOCKED vma that maps the page, including
+   unmapping at task exit.
+3) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file.
+4) before a page is COWed in a VM_LOCKED vma.
+
+
+Mlocked Pages:  mlock()/mlockall() System Call Handling
+
+Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
+for each vma in the range specified by the call.  In the case of mlockall(),
+this is the entire active address space of the task.  Note that mlock_fixup()
+is used for both mlock()ing and munlock()ing a range of memory.  A call to
+mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED
+is treated as a no-op--mlock_fixup() simply returns.
+
+If the vma passes some filtering described in "Mlocked Pages:  Filtering Vmas"
+below, mlock_fixup() will attempt to merge the vma with its neighbors or split
+off a subset of the vma if the range does not cover the entire vma.  Once the
+vma has been merged or split or neither, mlock_fixup() will call
+__mlock_vma_pages_range() to fault in the pages via get_user_pages() and
+to mark the pages as mlocked via mlock_vma_page().
+
+Note that the vma being mlocked might be mapped with PROT_NONE.  In this case,
+get_user_pages() will be unable to fault in the pages.  That's OK.  If pages
+do end up getting faulted into this VM_LOCKED vma, we'll handle them in the
+fault path or in vmscan.
+
+Also note that a page returned by get_user_pages() could be truncated or
+migrated out from under us, while we're trying to mlock it.  To detect
+this, __mlock_vma_pages_range() tests the page_mapping after acquiring
+the page lock.  If the page is still associated with its mapping, we'll
+go ahead and call mlock_vma_page().  If the mapping is gone, we just
+unlock the page and move on.  Worse case, this results in page mapped
+in a VM_LOCKED vma remaining on a normal LRU list without being
+PageMlocked().  Again, vmscan will detect and cull such pages.
+
+mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will
+TestSetPageMlocked() for each page returned by get_user_pages().  We use
+TestSetPageMlocked() because the page might already be mlocked by another
+task/vma and we don't want to do extra work.  We especially do not want to
+count an mlocked page more than once in the statistics.  If the page was
+already mlocked, mlock_vma_page() is done.
+
+If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
+page from the LRU, as it is likely on the appropriate active or inactive list
+at that time.  If the isolate_lru_page() succeeds, mlock_vma_page() will
+putback the page--putback_lru_page()--which will notice that the page is now
+mlocked and divert the page to the zone's unevictable LRU list.  If
+mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
+it later if/when it attempts to reclaim the page.
+
+
+Mlocked Pages:  Filtering Special Vmas
+
+mlock_fixup() filters several classes of "special" vmas:
+
+1) vmas with VM_IO|VM_PFNMAP set are skipped entirely.  The pages behind
+   these mappings are inherently pinned, so we don't need to mark them as
+   mlocked.  In any case, most of the pages have no struct page in which to
+   so mark the page.  Because of this, get_user_pages() will fail for these
+   vmas, so there is no sense in attempting to visit them.
+
+2) vmas mapping hugetlbfs page are already effectively pinned into memory.
+   We don't need nor want to mlock() these pages.  However, to preserve the
+   prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup()
+   will call make_pages_present() in the hugetlbfs vma range to allocate the
+   huge pages and populate the ptes.
+
+3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
+   kernel pages, such as the vdso page, relay channel pages, etc.  These pages
+   are inherently unevictable and are not managed on the LRU lists.
+   mlock_fixup() treats these vmas the same as hugetlbfs vmas.  It calls
+   make_pages_present() to populate the ptes.
+
+Note that for all of these special vmas, mlock_fixup() does not set the
+VM_LOCKED flag.  Therefore, we won't have to deal with them later during
+munlock() or munmap()--for example, at task exit.  Neither does mlock_fixup()
+account these vmas against the task's "locked_vm".
+
+Mlocked Pages:  Downgrading the Mmap Semaphore.
+
+mlock_fixup() must be called with the mmap semaphore held for write, because
+it may have to merge or split vmas.  However, mlocking a large region of
+memory can take a long time--especially if vmscan must reclaim pages to
+satisfy the regions requirements.  Faulting in a large region with the mmap
+semaphore held for write can hold off other faults on the address space, in
+the case of a multi-threaded task.  It can also hold off scans of the task's
+address space via /proc.  While testing under heavy load, it was observed that
+the ps(1) command could be held off for many minutes while a large segment was
+mlock()ed down.
+
+To address this issue, and to make the system more responsive during mlock()ing
+of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
+during the call to __mlock_vma_pages_range().  This works fine.  However, the
+callers of mlock_fixup() expect the semaphore to be returned in write mode.
+So, mlock_fixup() "upgrades" the semphore to write mode.  Linux does not
+support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
+and reacquire it in write mode.  In a multi-threaded task, it is possible for
+the task memory map to change while the semaphore is dropped.  Therefore,
+mlock_fixup() looks up the vma at the range start address after reacquiring
+the semaphore in write mode and verifies that it still covers the original
+range.  If not, mlock_fixup() returns an error [-EAGAIN].  All callers of
+mlock_fixup() have been changed to deal with this new error condition.
+
+Note:  when munlocking a region, all of the pages should already be resident--
+unless we have racing threads mlocking() and munlocking() regions.  So,
+unlocking should not have to wait for page allocations nor faults  of any kind.
+Therefore mlock_fixup() does not downgrade the semaphore for munlock().
+
+
+Mlocked Pages:  munlock()/munlockall() System Call Handling
+
+The munlock() and munlockall() system calls are handled by the same functions--
+do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
+vs lock operation indicated by an argument.  So, these system calls are also
+handled by mlock_fixup().  Again, if called for an already munlock()ed vma,
+mlock_fixup() simply returns.  Because of the vma filtering discussed above,
+VM_LOCKED will not be set in any "special" vmas.  So, these vmas will be
+ignored for munlock.
+
+If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off
+the specified range.  The range is then munlocked via the function
+__mlock_vma_pages_range()--the same function used to mlock a vma range--
+passing a flag to indicate that munlock() is being performed.
+
+Because the vma access protections could have been changed to PROT_NONE after
+faulting in and mlocking some pages, get_user_pages() was unreliable for visiting
+these pages for munlocking.  Because we don't want to leave pages mlocked(),
+get_user_pages() was enhanced to accept a flag to ignore the permissions when
+fetching the pages--all of which should be resident as a result of previous
+mlock()ing.
+
+For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
+munlock_vma_page().  munlock_vma_page() unconditionally clears the PG_mlocked
+flag using TestClearPageMlocked().  As with mlock_vma_page(), munlock_vma_page()
+use the Test*PageMlocked() function to handle the case where the page might
+have already been unlocked by another task.  If the page was mlocked,
+munlock_vma_page() updates that zone statistics for the number of mlocked
+pages.  Note, however, that at this point we haven't checked whether the page
+is mapped by other VM_LOCKED vmas.
+
+We can't call try_to_munlock(), the function that walks the reverse map to check
+for other VM_LOCKED vmas, without first isolating the page from the LRU.
+try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
+not be on an lru list.  [More on these below.]  However, the call to
+isolate_lru_page() could fail, in which case we couldn't try_to_munlock().
+So, we go ahead and clear PG_mlocked up front, as this might be the only chance
+we have.  If we can successfully isolate the page, we go ahead and
+try_to_munlock(), which will restore the PG_mlocked flag and update the zone
+page statistics if it finds another vma holding the page mlocked.  If we fail
+to isolate the page, we'll have left a potentially mlocked page on the LRU.
+This is fine, because we'll catch it later when/if vmscan tries to reclaim the
+page.  This should be relatively rare.
+
+Mlocked Pages:  Migrating Them...
+
+A page that is being migrated has been isolated from the lru lists and is
+held locked across unmapping of the page, updating the page's mapping
+[address_space] entry and copying the contents and state, until the
+page table entry has been replaced with an entry that refers to the new
+page.  Linux supports migration of mlocked pages and other unevictable
+pages.  This involves simply moving the PageMlocked and PageUnevictable states
+from the old page to the new page.
+
+Note that page migration can race with mlocking or munlocking of the same
+page.  This has been discussed from the mlock/munlock perspective in the
+respective sections above.  Both processes [migration, m[un]locking], hold
+the page locked.  This provides the first level of synchronization.  Page
+migration zeros out the page_mapping of the old page before unlocking it,
+so m[un]lock can skip these pages by testing the page mapping under page
+lock.
+
+When completing page migration, we place the new and old pages back onto the
+lru after dropping the page lock.  The "unneeded" page--old page on success,
+new page on failure--will be freed when the reference count held by the
+migration process is released.  To ensure that we don't strand pages on the
+unevictable list because of a race between munlock and migration, page
+migration uses the putback_lru_page() function to add migrated pages back to
+the lru.
+
+
+Mlocked Pages:  mmap(MAP_LOCKED) System Call Handling
+
+In addition the the mlock()/mlockall() system calls, an application can request
+that a region of memory be mlocked using the MAP_LOCKED flag with the mmap()
+call.  Furthermore, any mmap() call or brk() call that expands the heap by a
+task that has previously called mlockall() with the MCL_FUTURE flag will result
+in the newly mapped memory being mlocked.  Before the unevictable/mlock changes,
+the kernel simply called make_pages_present() to allocate pages and populate
+the page table.
+
+To mlock a range of memory under the unevictable/mlock infrastructure, the
+mmap() handler and task address space expansion functions call
+mlock_vma_pages_range() specifying the vma and the address range to mlock.
+mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in
+"Mlocked Pages:  Filtering Vmas".  It will clear the VM_LOCKED flag, which will
+have already been set by the caller, in filtered vmas.  Thus these vma's need
+not be visited for munlock when the region is unmapped.
+
+For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
+fault/allocate the pages and mlock them.  Again, like mlock_fixup(),
+mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
+attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore
+back to write mode before returning.
+
+The callers of mlock_vma_pages_range() will have already added the memory
+range to be mlocked to the task's "locked_vm".  To account for filtered vmas,
+mlock_vma_pages_range() returns the number of pages NOT mlocked.  All of the
+callers then subtract a non-negative return value from the task's locked_vm.
+A negative return value represent an error--for example, from get_user_pages()
+attempting to fault in a vma with PROT_NONE access.  In this case, we leave
+the memory range accounted as locked_vm, as the protections could be changed
+later and pages allocated into that region.
+
+
+Mlocked Pages:  munmap()/exit()/exec() System Call Handling
+
+When unmapping an mlocked region of memory, whether by an explicit call to
+munmap() or via an internal unmap from exit() or exec() processing, we must
+munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
+Before the unevictable/mlock changes, mlocking did not mark the pages in any way,
+so unmapping them required no processing.
+
+To munlock a range of memory under the unevictable/mlock infrastructure, the
+munmap() hander and task address space tear down function call
+munlock_vma_pages_all().  The name reflects the observation that one always
+specifies the entire vma range when munlock()ing during unmap of a region.
+Because of the vma filtering when mlocking() regions, only "normal" vmas that
+actually contain mlocked pages will be passed to munlock_vma_pages_all().
+
+munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup()
+for the munlock case, calls __munlock_vma_pages_range() to walk the page table
+for the vma's memory range and munlock_vma_page() each resident page mapped by
+the vma.  This effectively munlocks the page, only if this is the last
+VM_LOCKED vma that maps the page.
+
+
+Mlocked Page:  try_to_unmap()
+
+[Note:  the code changes represented by this section are really quite small
+compared to the text to describe what happening and why, and to discuss the
+implications.]
+
+Pages can, of course, be mapped into multiple vmas.  Some of these vmas may
+have VM_LOCKED flag set.  It is possible for a page mapped into one or more
+VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one
+of the active or inactive LRU lists.  This could happen if, for example, a
+task in the process of munlock()ing the page could not isolate the page from
+the LRU.  As a result, vmscan/shrink_page_list() might encounter such a page
+as described in "Unevictable Pages and Vmscan [shrink_*_list()]".  To
+handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED
+vmas while it is walking a page's reverse map.
+
+try_to_unmap() is always called, by either vmscan for reclaim or for page
+migration, with the argument page locked and isolated from the LRU.  BUG_ON()
+assertions enforce this requirement.  Separate functions handle anonymous and
+mapped file pages, as these types of pages have different reverse map
+mechanisms.
+
+       try_to_unmap_anon()
+
+To unmap anonymous pages, each vma in the list anchored in the anon_vma must be
+visited--at least until a VM_LOCKED vma is encountered.  If the page is being
+unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked
+pages are migratable.  However, for reclaim, if the page is mapped into a
+VM_LOCKED vma, the scan stops.  try_to_unmap() attempts to acquire the mmap
+semphore of the mm_struct to which the vma belongs in read mode.  If this is
+successful, try_to_unmap() will mlock the page via mlock_vma_page()--we
+wouldn't have gotten to try_to_unmap() if the page were already mlocked--and
+will return SWAP_MLOCK, indicating that the page is unevictable.  If the
+mmap semaphore cannot be acquired, we are not sure whether the page is really
+unevictable or not.  In this case, try_to_unmap() will return SWAP_AGAIN.
+
+       try_to_unmap_file() -- linear mappings
+
+Unmapping of a mapped file page works the same, except that the scan visits
+all vmas that maps the page's index/page offset in the page's mapping's
+reverse map priority search tree.  It must also visit each vma in the page's
+mapping's non-linear list, if the list is non-empty.  As for anonymous pages,
+on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will
+attempt to acquire the associated mm_struct's mmap semaphore to mlock the page,
+returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not.
+
+       try_to_unmap_file() -- non-linear mappings
+
+If a page's mapping contains a non-empty non-linear mapping vma list, then
+try_to_un{map|lock}() must also visit each vma in that list to determine
+whether the page is mapped in a VM_LOCKED vma.  Again, the scan must visit
+all vmas in the non-linear list to ensure that the pages is not/should not be
+mlocked.  If a VM_LOCKED vma is found in the list, the scan could terminate.
+However, there is no easy way to determine whether the page is actually mapped
+in a given vma--either for unmapping or testing whether the VM_LOCKED vma
+actually pins the page.
+
+So, try_to_unmap_file() handles non-linear mappings by scanning a certain
+number of pages--a "cluster"--in each non-linear vma associated with the page's
+mapping, for each file mapped page that vmscan tries to unmap.  If this happens
+to unmap the page we're trying to unmap, try_to_unmap() will notice this on
+return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS.  Otherwise, it
+will return SWAP_AGAIN, causing vmscan to recirculate this page.  We take
+advantage of the cluster scan in try_to_unmap_cluster() as follows:
+
+For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap
+semaphore of the associated mm_struct for read without blocking.  If this
+attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will
+retain the mmap semaphore for the scan; otherwise it drops it here.  Then,
+for each page in the cluster, if we're holding the mmap semaphore for a locked
+vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page.  This
+call is a no-op if the page is already locked, but will mlock any pages in
+the non-linear mapping that happen to be unlocked.  If one of the pages so
+mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will
+return SWAP_MLOCK, rather than the default SWAP_AGAIN.  This will allow vmscan
+to cull the page, rather than recirculating it on the inactive list.  Again,
+if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns
+SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but
+couldn't be mlocked.
+
+
+Mlocked pages:  try_to_munlock() Reverse Map Scan
+
+TODO/FIXME:  a better name might be page_mlocked()--analogous to the
+page_referenced() reverse map walker--especially if we continue to call this
+from shrink_page_list().  See related TODO/FIXME below.
+
+When munlock_vma_page()--see "Mlocked Pages:  munlock()/munlockall() System
+Call Handling" above--tries to munlock a page, or when shrink_page_list()
+encounters an anonymous page that is not yet in the swap cache, they need to
+determine whether or not the page is mapped by any VM_LOCKED vma, without
+actually attempting to unmap all ptes from the page.  For this purpose, the
+unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
+try_to_munlock().
+
+try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
+mapped file pages with an additional argument specifing unlock versus unmap
+processing.  Again, these functions walk the respective reverse maps looking
+for VM_LOCKED vmas.  When such a vma is found for anonymous pages and file
+pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
+attempt to acquire the associated mmap semphore, mlock the page via
+mlock_vma_page() and return SWAP_MLOCK.  This effectively undoes the
+pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs
+shrink_page_list() that the anonymous page should be culled rather than added
+to the swap cache in preparation for a try_to_unmap() that will almost
+certainly fail.
+
+If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
+semaphore, it will return SWAP_AGAIN.  This will allow shrink_page_list()
+to recycle the page on the inactive list and hope that it has better luck
+with the page next time.
+
+For file pages mapped into non-linear vmas, the try_to_munlock() logic works
+slightly differently.  On encountering a VM_LOCKED non-linear vma that might
+map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking
+the page.  munlock_vma_page() will just leave the page unlocked and let
+vmscan deal with it--the usual fallback position.
+
+Note that try_to_munlock()'s reverse map walk must visit every vma in a pages'
+reverse map to determine that a page is NOT mapped into any VM_LOCKED vma.
+However, the scan can terminate when it encounters a VM_LOCKED vma and can
+successfully acquire the vma's mmap semphore for read and mlock the page.
+Although try_to_munlock() can be called many [very many!] times when
+munlock()ing a large region or tearing down a large address space that has been
+mlocked via mlockall(), overall this is a fairly rare event.  In addition,
+although shrink_page_list() calls try_to_munlock() for every anonymous page that
+it handles that is not yet in the swap cache, on average anonymous pages will
+have very short reverse map lists.
+
+Mlocked Page:  Page Reclaim in shrink_*_list()
+
+shrink_active_list() culls any obviously unevictable pages--i.e.,
+!page_evictable(page, NULL)--diverting these to the unevictable lru
+list.  However, shrink_active_list() only sees unevictable pages that
+made it onto the active/inactive lru lists.  Note that these pages do not
+have PageUnevictable set--otherwise, they would be on the unevictable list and
+shrink_active_list would never see them.
+
+Some examples of these unevictable pages on the LRU lists are:
+
+1) ramfs pages that have been placed on the lru lists when first allocated.
+
+2) SHM_LOCKed shared memory pages.  shmctl(SHM_LOCK) does not attempt to
+   allocate or fault in the pages in the shared memory region.  This happens
+   when an application accesses the page the first time after SHM_LOCKing
+   the segment.
+
+3) Mlocked pages that could not be isolated from the lru and moved to the
+   unevictable list in mlock_vma_page().
+
+3) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't
+   acquire the vma's mmap semaphore to test the flags and set PageMlocked.
+   munlock_vma_page() was forced to let the page back on to the normal
+   LRU list for vmscan to handle.
+
+shrink_inactive_list() also culls any unevictable pages that it finds
+on the inactive lists, again diverting them to the appropriate zone's unevictable
+lru list.  shrink_inactive_list() should only see SHM_LOCKed pages that became
+SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
+pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
+the lru to recheck via try_to_munlock().  shrink_inactive_list() won't notice
+the latter, but will pass on to shrink_page_list().
+
+shrink_page_list() again culls obviously unevictable pages that it could
+encounter for similar reason to shrink_inactive_list().  As already discussed,
+shrink_page_list() proactively looks for anonymous pages that should have
+PG_mlocked set but don't--these would not be detected by page_evictable()--to
+avoid adding them to the swap cache unnecessarily.  File pages mapped into
+VM_LOCKED vmas but without PG_mlocked set will make it all the way to
+try_to_unmap().  shrink_page_list() will divert them to the unevictable list when
+try_to_unmap() returns SWAP_MLOCK, as discussed above.
+
+TODO/FIXME:  If we can enhance the swap cache to reliably remove entries
+with page_count(page) > 2, as long as all ptes are mapped to the page and
+not the swap entry, we can probably remove the call to try_to_munlock() in
+shrink_page_list() and just remove the page from the swap cache when
+try_to_unmap() returns SWAP_MLOCK.   Currently, remove_exclusive_swap_page()
+doesn't seem to allow that.
+
+
index a0f642b6a4b92b556e08c8c1d1e357979cd48322..6110197757a3f6283bf999519f1e7649fb96a867 100644 (file)
@@ -70,6 +70,7 @@ config AUTO_IRQ_AFFINITY
        default y
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 
 menu "System setup"
index 15fda434442428660e3d896661ec11c9f134e71e..d069526bd7673f9ead6eb33e825782ea690e2fb0 100644 (file)
@@ -74,12 +74,14 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_UAC_SIGBUS         7
 #define TIF_MEMDIE             8
 #define TIF_RESTORE_SIGMASK    9       /* restore signal mask in do_signal */
+#define TIF_FREEZE             16      /* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK   (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 /* Work to do on interrupt/exception return.  */
 #define _TIF_WORK_MASK         (_TIF_SIGPENDING | _TIF_NEED_RESCHED)
index 04dcc5e5d4c1c4954d8c80186ac3390d1a9074cc..9cd8dca742a733f8791b850872f96d68e72497a7 100644 (file)
@@ -655,7 +655,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
 
        case 0x71:                                      /* RTC_PORT(1) */
                rtc_access.index = index;
-               rtc_access.data = BCD_TO_BIN(b);
+               rtc_access.data = bcd2bin(b);
                rtc_access.function = 0x48 + !write;    /* GET/PUT_TOY */
 
 #ifdef CONFIG_SMP
@@ -668,7 +668,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
 #else
                __marvel_access_rtc(&rtc_access);
 #endif
-               ret = BIN_TO_BCD(rtc_access.data);
+               ret = bin2bcd(rtc_access.data);
                break;
 
        default:
index 75480cab0893c391619e681bdc1ee6dd6adbf7ed..e6a231435cbac57f90ffe77907af51cf4b5ccc4f 100644 (file)
@@ -346,12 +346,12 @@ time_init(void)
        year = CMOS_READ(RTC_YEAR);
 
        if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-               BCD_TO_BIN(sec);
-               BCD_TO_BIN(min);
-               BCD_TO_BIN(hour);
-               BCD_TO_BIN(day);
-               BCD_TO_BIN(mon);
-               BCD_TO_BIN(year);
+               sec = bcd2bin(sec);
+               min = bcd2bin(min);
+               hour = bcd2bin(hour);
+               day = bcd2bin(day);
+               mon = bcd2bin(mon);
+               year = bcd2bin(year);
        }
 
        /* PC-like is standard; used for year >= 70 */
@@ -525,7 +525,7 @@ set_rtc_mmss(unsigned long nowtime)
 
        cmos_minutes = CMOS_READ(RTC_MINUTES);
        if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               BCD_TO_BIN(cmos_minutes);
+               cmos_minutes = bcd2bin(cmos_minutes);
 
        /*
         * since we're only adjusting minutes and seconds,
@@ -543,8 +543,8 @@ set_rtc_mmss(unsigned long nowtime)
 
        if (abs(real_minutes - cmos_minutes) < 30) {
                if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-                       BIN_TO_BCD(real_seconds);
-                       BIN_TO_BCD(real_minutes);
+                       real_seconds = bin2bcd(real_seconds);
+                       real_minutes = bin2bcd(real_minutes);
                }
                CMOS_WRITE(real_seconds,RTC_SECONDS);
                CMOS_WRITE(real_minutes,RTC_MINUTES);
index 4853f9df37bd22a037f122bd7d78a9a6600cc474..df39d20f7425ad9c41679a566ae212f924a222ff 100644 (file)
@@ -192,6 +192,8 @@ config VECTORS_BASE
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "System Type"
 
 choice
index eb4b190b6657256898dfbc2da475a32a98ee6f4d..eb35fca9aea527bde9992f80917fd69cc174c346 100644 (file)
@@ -4,6 +4,43 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
+struct pxa3xx_nand_timing {
+       unsigned int    tCH;  /* Enable signal hold time */
+       unsigned int    tCS;  /* Enable signal setup time */
+       unsigned int    tWH;  /* ND_nWE high duration */
+       unsigned int    tWP;  /* ND_nWE pulse time */
+       unsigned int    tRH;  /* ND_nRE high duration */
+       unsigned int    tRP;  /* ND_nRE pulse width */
+       unsigned int    tR;   /* ND_nWE high to ND_nRE low for read */
+       unsigned int    tWHR; /* ND_nWE high to ND_nRE low for status read */
+       unsigned int    tAR;  /* ND_ALE low to ND_nRE low delay */
+};
+
+struct pxa3xx_nand_cmdset {
+       uint16_t        read1;
+       uint16_t        read2;
+       uint16_t        program;
+       uint16_t        read_status;
+       uint16_t        read_id;
+       uint16_t        erase;
+       uint16_t        reset;
+       uint16_t        lock;
+       uint16_t        unlock;
+       uint16_t        lock_status;
+};
+
+struct pxa3xx_nand_flash {
+       const struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
+       const struct pxa3xx_nand_cmdset *cmdset;
+
+       uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
+       uint32_t page_size;     /* Page size in bytes (PAGE_SZ) */
+       uint32_t flash_width;   /* Width of Flash memory (DWIDTH_M) */
+       uint32_t dfc_width;     /* Width of flash controller(DWIDTH_C) */
+       uint32_t num_blocks;    /* Number of physical blocks in Flash */
+       uint32_t chip_id;
+};
+
 struct pxa3xx_nand_platform_data {
 
        /* the data flash bus is shared between the Static Memory
@@ -12,8 +49,11 @@ struct pxa3xx_nand_platform_data {
         */
        int     enable_arbiter;
 
-       struct mtd_partition *parts;
-       unsigned int    nr_parts;
+       const struct mtd_partition              *parts;
+       unsigned int                            nr_parts;
+
+       const struct pxa3xx_nand_flash *        flash;
+       size_t                                  num_flash;
 };
 
 extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info);
diff --git a/arch/arm/plat-mxc/include/mach/mxc_nand.h b/arch/arm/plat-mxc/include/mach/mxc_nand.h
new file mode 100644 (file)
index 0000000..2b972df
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef __ASM_ARCH_NAND_H
+#define __ASM_ARCH_NAND_H
+
+struct mxc_nand_platform_data {
+       int width;      /* data bus width in bytes */
+       int hw_ecc;     /* 0 if supress hardware ECC */
+};
+#endif /* __ASM_ARCH_NAND_H */
index d57f20226b28733743c35455bb798836ba34f299..4649d302c263723abb48c6c33579d39f96813e4f 100644 (file)
@@ -16,6 +16,10 @@ struct omap_onenand_platform_data {
        int                     gpio_irq;
        struct mtd_partition    *parts;
        int                     nr_parts;
-       int                     (*onenand_setup)(void __iomem *);
+       int                     (*onenand_setup)(void __iomem *, int freq);
        int                     dma_channel;
 };
+
+int omap2_onenand_rephase(void);
+
+#define ONENAND_MAX_PARTITIONS 8
index 7c239a916275b6b66b326e7fc93c6207bdbaf270..33a5b2969eb4addf24e36a59159d72488530dc0b 100644 (file)
@@ -72,6 +72,8 @@ config GENERIC_BUG
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "System Type and features"
 
 source "kernel/time/Kconfig"
index 294b25f9323dda6a4885de1a6b874b4a6a3bb825..4442f8d2d4239a09470e04583aa9f168583246d9 100644 (file)
@@ -96,6 +96,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_MEMDIE            (1 << TIF_MEMDIE)
 #define _TIF_RESTORE_SIGMASK   (1 << TIF_RESTORE_SIGMASK)
 #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
+#define _TIF_FREEZE            (1 << TIF_FREEZE)
 
 /* Note: The masks below must never span more than 16 bits! */
 
index 8102c79aaa94044369612ab3a8dc3bb6e33aa807..29e71ed6b8a7fb9c5e4b8381244a4c3d5b1d47db 100644 (file)
@@ -64,8 +64,11 @@ config HARDWARE_PM
        depends on OPROFILE
 
 source "init/Kconfig"
+
 source "kernel/Kconfig.preempt"
 
+source "kernel/Kconfig.freezer"
+
 menu "Blackfin Processor Options"
 
 comment "Processor and Board Settings"
index 9389d38f222f2f749497b106872278a7da5bdd0b..07335e719bf835d7ad5e88a6b844f07210b4b6ba 100644 (file)
@@ -62,6 +62,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "General setup"
 
 source "fs/Kconfig.binfmt"
index c9aa3904be05610f35425ced24c0a88a93383350..3bdfaf43390ce00b03ead8226136b3849a63e1e6 100644 (file)
@@ -215,12 +215,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
 
        local_irq_restore(flags);
        
-       BCD_TO_BIN(rtc_tm->tm_sec);
-       BCD_TO_BIN(rtc_tm->tm_min);
-       BCD_TO_BIN(rtc_tm->tm_hour);
-       BCD_TO_BIN(rtc_tm->tm_mday);
-       BCD_TO_BIN(rtc_tm->tm_mon);
-       BCD_TO_BIN(rtc_tm->tm_year);
+       rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+       rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+       rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+       rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+       rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+       rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
        /*
         * Account for differences between how the RTC uses the values
@@ -295,12 +295,12 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                        else
                                yrs -= 1900;    /* RTC (70, 71, ... 99) */
 
-                       BIN_TO_BCD(sec);
-                       BIN_TO_BCD(min);
-                       BIN_TO_BCD(hrs);
-                       BIN_TO_BCD(day);
-                       BIN_TO_BCD(mon);
-                       BIN_TO_BCD(yrs);
+                       sec = bin2bcd(sec);
+                       min = bin2bcd(min);
+                       hrs = bin2bcd(hrs);
+                       day = bin2bcd(day);
+                       mon = bin2bcd(mon);
+                       yrs = bin2bcd(yrs);
 
                        local_irq_save(flags);
                        CMOS_WRITE(yrs, RTC_YEAR);
index 8769dc914073804ce0c75240e0aa6b60fc7c602a..1e90c1a9c849bd4453b462e3b611808171e5c475 100644 (file)
@@ -122,7 +122,7 @@ get_rtc_time(struct rtc_time *tm)
                       "information is no longer guaranteed!\n", PCF8563_NAME);
        }
 
-       tm->tm_year  = BCD_TO_BIN(tm->tm_year) +
+       tm->tm_year  = bcd2bin(tm->tm_year) +
                       ((tm->tm_mon & 0x80) ? 100 : 0);
        tm->tm_sec  &= 0x7F;
        tm->tm_min  &= 0x7F;
@@ -131,11 +131,11 @@ get_rtc_time(struct rtc_time *tm)
        tm->tm_wday &= 0x07; /* Not coded in BCD. */
        tm->tm_mon  &= 0x1F;
 
-       BCD_TO_BIN(tm->tm_sec);
-       BCD_TO_BIN(tm->tm_min);
-       BCD_TO_BIN(tm->tm_hour);
-       BCD_TO_BIN(tm->tm_mday);
-       BCD_TO_BIN(tm->tm_mon);
+       tm->tm_sec = bcd2bin(tm->tm_sec);
+       tm->tm_min = bcd2bin(tm->tm_min);
+       tm->tm_hour = bcd2bin(tm->tm_hour);
+       tm->tm_mday = bcd2bin(tm->tm_mday);
+       tm->tm_mon = bcd2bin(tm->tm_mon);
        tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
 }
 
@@ -282,12 +282,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                century = (tm.tm_year >= 2000) ? 0x80 : 0;
                tm.tm_year = tm.tm_year % 100;
 
-               BIN_TO_BCD(tm.tm_year);
-               BIN_TO_BCD(tm.tm_mon);
-               BIN_TO_BCD(tm.tm_mday);
-               BIN_TO_BCD(tm.tm_hour);
-               BIN_TO_BCD(tm.tm_min);
-               BIN_TO_BCD(tm.tm_sec);
+               tm.tm_year = bin2bcd(tm.tm_year);
+               tm.tm_mon = bin2bcd(tm.tm_mon);
+               tm.tm_mday = bin2bcd(tm.tm_mday);
+               tm.tm_hour = bin2bcd(tm.tm_hour);
+               tm.tm_min = bin2bcd(tm.tm_min);
+               tm.tm_sec = bin2bcd(tm.tm_sec);
                tm.tm_mon |= century;
 
                mutex_lock(&rtc_lock);
index f263ab571221cb66e5fc200659069ea2291450d7..f4478506e52ca1df58b52180ffd871e77c51e726 100644 (file)
@@ -118,7 +118,7 @@ get_rtc_time(struct rtc_time *tm)
                       "information is no longer guaranteed!\n", PCF8563_NAME);
        }
 
-       tm->tm_year  = BCD_TO_BIN(tm->tm_year) +
+       tm->tm_year  = bcd2bin(tm->tm_year) +
                       ((tm->tm_mon & 0x80) ? 100 : 0);
        tm->tm_sec  &= 0x7F;
        tm->tm_min  &= 0x7F;
@@ -127,11 +127,11 @@ get_rtc_time(struct rtc_time *tm)
        tm->tm_wday &= 0x07; /* Not coded in BCD. */
        tm->tm_mon  &= 0x1F;
 
-       BCD_TO_BIN(tm->tm_sec);
-       BCD_TO_BIN(tm->tm_min);
-       BCD_TO_BIN(tm->tm_hour);
-       BCD_TO_BIN(tm->tm_mday);
-       BCD_TO_BIN(tm->tm_mon);
+       tm->tm_sec = bcd2bin(tm->tm_sec);
+       tm->tm_min = bcd2bin(tm->tm_min);
+       tm->tm_hour = bcd2bin(tm->tm_hour);
+       tm->tm_mday = bcd2bin(tm->tm_mday);
+       tm->tm_mon = bcd2bin(tm->tm_mon);
        tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
 }
 
@@ -279,12 +279,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                century = (tm.tm_year >= 2000) ? 0x80 : 0;
                tm.tm_year = tm.tm_year % 100;
 
-               BIN_TO_BCD(tm.tm_year);
-               BIN_TO_BCD(tm.tm_mon);
-               BIN_TO_BCD(tm.tm_mday);
-               BIN_TO_BCD(tm.tm_hour);
-               BIN_TO_BCD(tm.tm_min);
-               BIN_TO_BCD(tm.tm_sec);
+               tm.tm_year = bin2bcd(tm.tm_year);
+               tm.tm_mon = bin2bcd(tm.tm_mon);
+               tm.tm_mday = bin2bcd(tm.tm_mday);
+               tm.tm_hour = bin2bcd(tm.tm_hour);
+               tm.tm_min = bin2bcd(tm.tm_min);
+               tm.tm_sec = bin2bcd(tm.tm_sec);
                tm.tm_mon |= century;
 
                mutex_lock(&rtc_lock);
index ff4c6aa75defa157e1e4bd436cf47d4c4526e415..074fe7dea96bbaa44c27f6ef513253552646a756 100644 (file)
@@ -127,7 +127,7 @@ int set_rtc_mmss(unsigned long nowtime)
                return 0;
 
        cmos_minutes = CMOS_READ(RTC_MINUTES);
-       BCD_TO_BIN(cmos_minutes);
+       cmos_minutes = bcd2bin(cmos_minutes);
 
        /*
         * since we're only adjusting minutes and seconds,
@@ -142,8 +142,8 @@ int set_rtc_mmss(unsigned long nowtime)
        real_minutes %= 60;
 
        if (abs(real_minutes - cmos_minutes) < 30) {
-               BIN_TO_BCD(real_seconds);
-               BIN_TO_BCD(real_minutes);
+               real_seconds = bin2bcd(real_seconds);
+               real_minutes = bin2bcd(real_minutes);
                CMOS_WRITE(real_seconds,RTC_SECONDS);
                CMOS_WRITE(real_minutes,RTC_MINUTES);
        } else {
@@ -170,12 +170,12 @@ get_cmos_time(void)
        mon = CMOS_READ(RTC_MONTH);
        year = CMOS_READ(RTC_YEAR);
 
-       BCD_TO_BIN(sec);
-       BCD_TO_BIN(min);
-       BCD_TO_BIN(hour);
-       BCD_TO_BIN(day);
-       BCD_TO_BIN(mon);
-       BCD_TO_BIN(year);
+       sec = bcd2bin(sec);
+       min = bcd2bin(min);
+       hour = bcd2bin(hour);
+       day = bcd2bin(day);
+       mon = bcd2bin(mon);
+       year = bcd2bin(year);
 
        if ((year += 1900) < 1970)
                year += 100;
index a5aac1b075628b8a30004d52bb73d2e1ce7d978f..9d1552a9ee2c88ddb40bc7d70dea316f725d2843 100644 (file)
@@ -66,6 +66,8 @@ mainmenu "Fujitsu FR-V Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Fujitsu FR-V system setup"
 
index c7966746fbfec582938b77a37baaed384d1b0ec2..bd1995403c67d0f9b37b34674a6c80ab7ee2a76b 100644 (file)
@@ -90,6 +90,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 source "arch/h8300/Kconfig.cpu"
 
 menu "Executable file formats"
index aafd4d322ec3e2e8c6767aac9ea125587ff4b322..700014d2155fd8d82a4aeadb54c4b3ad682855bb 100644 (file)
@@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void)
                                           TIF_NEED_RESCHED */
 #define TIF_MEMDIE             4
 #define TIF_RESTORE_SIGMASK    5       /* restore signal mask in do_signal() */
+#define TIF_FREEZE             16      /* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
@@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK   (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK         0x0000FFFE      /* work to do on interrupt/exception return */
 
index 3b7aa38254a8408b5564a260eb7a2ef30cf8d5f8..912c57db2d2180888ee9b366d7fae435c45c354d 100644 (file)
@@ -7,6 +7,8 @@ mainmenu "IA-64 Linux Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Processor type and features"
 
 config IA64
index 4956be40d7b56d9c03a3224e45f743e8f6e34d39..d98f0f4ff83f22c01df79ffc063d5aaafd8be4a7 100644 (file)
@@ -2070,14 +2070,13 @@ sba_init(void)
        if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
                return 0;
 
-#if defined(CONFIG_IA64_GENERIC) && defined(CONFIG_CRASH_DUMP) && \
-        defined(CONFIG_PROC_FS)
+#if defined(CONFIG_IA64_GENERIC)
        /* If we are booting a kdump kernel, the sba_iommu will
         * cause devices that were not shutdown properly to MCA
         * as soon as they are turned back on.  Our only option for
         * a successful kdump kernel boot is to use the swiotlb.
         */
-       if (elfcorehdr_addr < ELFCORE_ADDR_MAX) {
+       if (is_kdump_kernel()) {
                if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
                        panic("Unable to initialize software I/O TLB:"
                                  " Try machvec=dig boot option");
index da60e90eeeb1493ee21ab75b130eb3d15732ae83..23e91290e41faf3151bbb93a7bfdde76553ecb3a 100644 (file)
@@ -8,10 +8,14 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/crash_dump.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
index 51b75cea701866629449f7945ddef10079faa7ab..efaff15d8cf1dc9cad35a767a53b3865d51037b4 100644 (file)
@@ -1335,7 +1335,7 @@ kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
 }
 #endif
 
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_DUMP
 /* locate the size find a the descriptor at a certain address */
 unsigned long __init
 vmcore_find_descriptor_size (unsigned long address)
index de636b215677e801c23076d74f6d5a2d29e68efb..916ba898237f27a6eca8d5b79cc92cce86b716f1 100644 (file)
@@ -352,7 +352,7 @@ reserve_memory (void)
        }
 #endif
 
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_KERNEL
        if (reserve_elfcorehdr(&rsvd_region[n].start,
                               &rsvd_region[n].end) == 0)
                n++;
@@ -478,7 +478,12 @@ static __init int setup_nomca(char *s)
 }
 early_param("nomca", setup_nomca);
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+#ifdef CONFIG_CRASH_DUMP
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel.
  */
@@ -502,11 +507,11 @@ int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
         * to work properly.
         */
 
-       if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
+       if (!is_vmcore_usable())
                return -EINVAL;
 
        if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
-               elfcorehdr_addr = ELFCORE_ADDR_MAX;
+               vmcore_unusable();
                return -EINVAL;
        }
 
index f482a9098e32aadec0fc194befc4906a6be7f6b2..054bcd9439aa887a724fc031ec91701b7fefb218 100644 (file)
@@ -700,23 +700,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
        return ret;
 }
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
-       unsigned long start_pfn, end_pfn;
-       unsigned long timeout = 120 * HZ;
-       int ret;
-       start_pfn = start >> PAGE_SHIFT;
-       end_pfn = start_pfn + (size >> PAGE_SHIFT);
-       ret = offline_pages(start_pfn, end_pfn, timeout);
-       if (ret)
-               goto out;
-       /* we can free mem_map at this point */
-out:
-       return ret;
-}
-EXPORT_SYMBOL_GPL(remove_memory);
-#endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif
 
 /*
index 00289c178f895a6ad62a7fb34603af08d52bc961..dbaed4a638153bc43c5fa4d6e138039bb5cca8df 100644 (file)
@@ -42,6 +42,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Processor type and features"
 
index 677c93a490f6f49501e4c7d2e375fd42bb4dabc5..836fb66f080dcbadeb5f6d96fba8f01679d85d02 100644 (file)
@@ -62,6 +62,8 @@ mainmenu "Linux/68k Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Platform dependent setup"
 
 config EISA
index 808c9018b115022dd2ba8dee88a070fc003448ce..c50bec8aabb1983e3517857b5abafa75f91b7ace 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/poll.h>
 #include <linux/module.h>
 #include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */
-#include <linux/smp_lock.h>
 #include <linux/bcd.h>
 #include <asm/bvme6000hw.h>
 
index 0a8998315e5ed9fcdada66d87c5ee1127e35b9ac..76b66feb74df82d42b21a4b8ac67d6df0f498812 100644 (file)
@@ -75,6 +75,8 @@ config NO_IOPORT
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Processor type and features"
 
 choice
index 0c9bc095f3f0bc3cae753f788f72786a67b96512..82529f424ea3d567d0c1a9a2991b9591f7dacd85 100644 (file)
@@ -84,12 +84,14 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_POLLING_NRFLAG     3       /* true if poll_idle() is polling
                                           TIF_NEED_RESCHED */
 #define TIF_MEMDIE             4
+#define TIF_FREEZE             16      /* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK         0x0000FFFE      /* work to do on interrupt/exception return */
 
index b905744d79153be3d3170e9f81ce4e0805974d71..5f149b030c0f9fc7949793c6d8591d4bf0fd0450 100644 (file)
@@ -1885,6 +1885,8 @@ config PROBE_INITRD_HEADER
          add initrd or initramfs image to the kernel image.
          Otherwise, say N.
 
+source "kernel/Kconfig.freezer"
+
 menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)"
 
 config HW_HAS_EISA
index 3965fda94a89afd60fdc861f9520340cc9d246cd..1359c03ded5111adb0892029b59ebffbd26eee7f 100644 (file)
@@ -45,12 +45,12 @@ unsigned long read_persistent_clock(void)
        spin_unlock_irqrestore(&rtc_lock, flags);
 
        if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-               sec = BCD2BIN(sec);
-               min = BCD2BIN(min);
-               hour = BCD2BIN(hour);
-               day = BCD2BIN(day);
-               mon = BCD2BIN(mon);
-               year = BCD2BIN(year);
+               sec = bcd2bin(sec);
+               min = bcd2bin(min);
+               hour = bcd2bin(hour);
+               day = bcd2bin(day);
+               mon = bcd2bin(mon);
+               year = bcd2bin(year);
        }
 
        year += real_year - 72 + 2000;
@@ -83,7 +83,7 @@ int rtc_mips_set_mmss(unsigned long nowtime)
 
        cmos_minutes = CMOS_READ(RTC_MINUTES);
        if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               cmos_minutes = BCD2BIN(cmos_minutes);
+               cmos_minutes = bcd2bin(cmos_minutes);
 
        /*
         * since we're only adjusting minutes and seconds,
@@ -99,8 +99,8 @@ int rtc_mips_set_mmss(unsigned long nowtime)
 
        if (abs(real_minutes - cmos_minutes) < 30) {
                if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-                       real_seconds = BIN2BCD(real_seconds);
-                       real_minutes = BIN2BCD(real_minutes);
+                       real_seconds = bin2bcd(real_seconds);
+                       real_minutes = bin2bcd(real_minutes);
                }
                CMOS_WRITE(real_seconds, RTC_SECONDS);
                CMOS_WRITE(real_minutes, RTC_MINUTES);
index cdc379a0a94ea6c39bd2f4ebdeed2e2774d72c27..199b45733a95ba4ff2d99d548fa354cfb219c7a4 100644 (file)
@@ -44,7 +44,7 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
 
        cmos_minutes = CMOS_READ(RTC_MINUTES);
        if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               BCD_TO_BIN(cmos_minutes);
+               cmos_minutes = bcd2bin(cmos_minutes);
 
        /*
         * since we're only adjusting minutes and seconds,
@@ -60,8 +60,8 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
 
        if (abs(real_minutes - cmos_minutes) < 30) {
                if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-                       BIN_TO_BCD(real_seconds);
-                       BIN_TO_BCD(real_minutes);
+                       real_seconds = bin2bcd(real_seconds);
+                       real_minutes = bin2bcd(real_minutes);
                }
                CMOS_WRITE(real_seconds, RTC_SECONDS);
                CMOS_WRITE(real_minutes, RTC_MINUTES);
@@ -103,12 +103,12 @@ static inline unsigned long mc146818_get_cmos_time(void)
        } while (sec != CMOS_READ(RTC_SECONDS));
 
        if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-               BCD_TO_BIN(sec);
-               BCD_TO_BIN(min);
-               BCD_TO_BIN(hour);
-               BCD_TO_BIN(day);
-               BCD_TO_BIN(mon);
-               BCD_TO_BIN(year);
+               sec = bcd2bin(sec);
+               min = bcd2bin(min);
+               hour = bcd2bin(hour);
+               day = bcd2bin(day);
+               mon = bcd2bin(mon);
+               year = bcd2bin(year);
        }
        spin_unlock_irqrestore(&rtc_lock, flags);
        year = mc146818_decode_year(year);
index 6537d90a25bbcbd1cc0430a41e2e0d6b46950616..2d3c0dca275d40fd75a1db2b0aba1b54d635f338 100644 (file)
@@ -79,14 +79,14 @@ unsigned long read_persistent_clock(void)
        /* Stop the update to the time */
        m48t37_base->control = 0x40;
 
-       year = BCD2BIN(m48t37_base->year);
-       year += BCD2BIN(m48t37_base->century) * 100;
+       year = bcd2bin(m48t37_base->year);
+       year += bcd2bin(m48t37_base->century) * 100;
 
-       month = BCD2BIN(m48t37_base->month);
-       day = BCD2BIN(m48t37_base->date);
-       hour = BCD2BIN(m48t37_base->hour);
-       min = BCD2BIN(m48t37_base->min);
-       sec = BCD2BIN(m48t37_base->sec);
+       month = bcd2bin(m48t37_base->month);
+       day = bcd2bin(m48t37_base->date);
+       hour = bcd2bin(m48t37_base->hour);
+       min = bcd2bin(m48t37_base->min);
+       sec = bcd2bin(m48t37_base->sec);
 
        /* Start the update to the time again */
        m48t37_base->control = 0x00;
@@ -113,22 +113,22 @@ int rtc_mips_set_time(unsigned long tim)
        m48t37_base->control = 0x80;
 
        /* year */
-       m48t37_base->year = BIN2BCD(tm.tm_year % 100);
-       m48t37_base->century = BIN2BCD(tm.tm_year / 100);
+       m48t37_base->year = bin2bcd(tm.tm_year % 100);
+       m48t37_base->century = bin2bcd(tm.tm_year / 100);
 
        /* month */
-       m48t37_base->month = BIN2BCD(tm.tm_mon);
+       m48t37_base->month = bin2bcd(tm.tm_mon);
 
        /* day */
-       m48t37_base->date = BIN2BCD(tm.tm_mday);
+       m48t37_base->date = bin2bcd(tm.tm_mday);
 
        /* hour/min/sec */
-       m48t37_base->hour = BIN2BCD(tm.tm_hour);
-       m48t37_base->min = BIN2BCD(tm.tm_min);
-       m48t37_base->sec = BIN2BCD(tm.tm_sec);
+       m48t37_base->hour = bin2bcd(tm.tm_hour);
+       m48t37_base->min = bin2bcd(tm.tm_min);
+       m48t37_base->sec = bin2bcd(tm.tm_sec);
 
        /* day of week -- not really used, but let's keep it up-to-date */
-       m48t37_base->day = BIN2BCD(tm.tm_wday + 1);
+       m48t37_base->day = bin2bcd(tm.tm_wday + 1);
 
        /* disable writing */
        m48t37_base->control = 0x00;
index 26fbff4c15b1ece152870eef2bdf152cf0e15cc8..b732600b47f5e5e8ad7908e61cd55d22d9f4f6f2 100644 (file)
@@ -156,32 +156,32 @@ int m41t81_set_time(unsigned long t)
         */
 
        spin_lock_irqsave(&rtc_lock, flags);
-       tm.tm_sec = BIN2BCD(tm.tm_sec);
+       tm.tm_sec = bin2bcd(tm.tm_sec);
        m41t81_write(M41T81REG_SC, tm.tm_sec);
 
-       tm.tm_min = BIN2BCD(tm.tm_min);
+       tm.tm_min = bin2bcd(tm.tm_min);
        m41t81_write(M41T81REG_MN, tm.tm_min);
 
-       tm.tm_hour = BIN2BCD(tm.tm_hour);
+       tm.tm_hour = bin2bcd(tm.tm_hour);
        tm.tm_hour = (tm.tm_hour & 0x3f) | (m41t81_read(M41T81REG_HR) & 0xc0);
        m41t81_write(M41T81REG_HR, tm.tm_hour);
 
        /* tm_wday starts from 0 to 6 */
        if (tm.tm_wday == 0) tm.tm_wday = 7;
-       tm.tm_wday = BIN2BCD(tm.tm_wday);
+       tm.tm_wday = bin2bcd(tm.tm_wday);
        m41t81_write(M41T81REG_DY, tm.tm_wday);
 
-       tm.tm_mday = BIN2BCD(tm.tm_mday);
+       tm.tm_mday = bin2bcd(tm.tm_mday);
        m41t81_write(M41T81REG_DT, tm.tm_mday);
 
        /* tm_mon starts from 0, *ick* */
        tm.tm_mon ++;
-       tm.tm_mon = BIN2BCD(tm.tm_mon);
+       tm.tm_mon = bin2bcd(tm.tm_mon);
        m41t81_write(M41T81REG_MO, tm.tm_mon);
 
        /* we don't do century, everything is beyond 2000 */
        tm.tm_year %= 100;
-       tm.tm_year = BIN2BCD(tm.tm_year);
+       tm.tm_year = bin2bcd(tm.tm_year);
        m41t81_write(M41T81REG_YR, tm.tm_year);
        spin_unlock_irqrestore(&rtc_lock, flags);
 
@@ -209,12 +209,12 @@ unsigned long m41t81_get_time(void)
        year = m41t81_read(M41T81REG_YR);
        spin_unlock_irqrestore(&rtc_lock, flags);
 
-       sec = BCD2BIN(sec);
-       min = BCD2BIN(min);
-       hour = BCD2BIN(hour);
-       day = BCD2BIN(day);
-       mon = BCD2BIN(mon);
-       year = BCD2BIN(year);
+       sec = bcd2bin(sec);
+       min = bcd2bin(min);
+       hour = bcd2bin(hour);
+       day = bcd2bin(day);
+       mon = bcd2bin(mon);
+       year = bcd2bin(year);
 
        year += 2000;
 
index ff3e5dabb348edda9707e69b5f18c77a0a69790c..4438b2195c4475d8f1b659422d354de78d07e132 100644 (file)
@@ -124,18 +124,18 @@ int xicor_set_time(unsigned long t)
        xicor_write(X1241REG_SR, X1241REG_SR_WEL | X1241REG_SR_RWEL);
 
        /* trivial ones */
-       tm.tm_sec = BIN2BCD(tm.tm_sec);
+       tm.tm_sec = bin2bcd(tm.tm_sec);
        xicor_write(X1241REG_SC, tm.tm_sec);
 
-       tm.tm_min = BIN2BCD(tm.tm_min);
+       tm.tm_min = bin2bcd(tm.tm_min);
        xicor_write(X1241REG_MN, tm.tm_min);
 
-       tm.tm_mday = BIN2BCD(tm.tm_mday);
+       tm.tm_mday = bin2bcd(tm.tm_mday);
        xicor_write(X1241REG_DT, tm.tm_mday);
 
        /* tm_mon starts from 0, *ick* */
        tm.tm_mon ++;
-       tm.tm_mon = BIN2BCD(tm.tm_mon);
+       tm.tm_mon = bin2bcd(tm.tm_mon);
        xicor_write(X1241REG_MO, tm.tm_mon);
 
        /* year is split */
@@ -148,7 +148,7 @@ int xicor_set_time(unsigned long t)
        tmp = xicor_read(X1241REG_HR);
        if (tmp & X1241REG_HR_MIL) {
                /* 24 hour format */
-               tm.tm_hour = BIN2BCD(tm.tm_hour);
+               tm.tm_hour = bin2bcd(tm.tm_hour);
                tmp = (tmp & ~0x3f) | (tm.tm_hour & 0x3f);
        } else {
                /* 12 hour format, with 0x2 for pm */
@@ -157,7 +157,7 @@ int xicor_set_time(unsigned long t)
                        tmp |= 0x20;
                        tm.tm_hour -= 12;
                }
-               tm.tm_hour = BIN2BCD(tm.tm_hour);
+               tm.tm_hour = bin2bcd(tm.tm_hour);
                tmp |= tm.tm_hour;
        }
        xicor_write(X1241REG_HR, tmp);
@@ -191,13 +191,13 @@ unsigned long xicor_get_time(void)
        y2k = xicor_read(X1241REG_Y2K);
        spin_unlock_irqrestore(&rtc_lock, flags);
 
-       sec = BCD2BIN(sec);
-       min = BCD2BIN(min);
-       hour = BCD2BIN(hour);
-       day = BCD2BIN(day);
-       mon = BCD2BIN(mon);
-       year = BCD2BIN(year);
-       y2k = BCD2BIN(y2k);
+       sec = bcd2bin(sec);
+       min = bcd2bin(min);
+       hour = bcd2bin(hour);
+       day = bcd2bin(day);
+       mon = bcd2bin(mon);
+       year = bcd2bin(year);
+       y2k = bcd2bin(y2k);
 
        year += (y2k * 100);
 
index dd557c9cf001be6226441b78cb84753f5e50cb15..9a9f433588792f5030796b2686193f4983c98ce9 100644 (file)
@@ -68,6 +68,8 @@ mainmenu "Matsushita MN10300/AM33 Kernel Configuration"
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Matsushita MN10300 system setup"
 
index 042f792d843052c7396ce76641a5cc8b472e7215..7978470b5749352e3b1f81beb018648be65ac786 100644 (file)
@@ -67,7 +67,7 @@ static int set_rtc_mmss(unsigned long nowtime)
 
        cmos_minutes = CMOS_READ(RTC_MINUTES);
        if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               BCD_TO_BIN(cmos_minutes);
+               cmos_minutes = bcd2bin(cmos_minutes);
 
        /*
         * since we're only adjusting minutes and seconds,
@@ -84,8 +84,8 @@ static int set_rtc_mmss(unsigned long nowtime)
 
        if (abs(real_minutes - cmos_minutes) < 30) {
                if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-                       BIN_TO_BCD(real_seconds);
-                       BIN_TO_BCD(real_minutes);
+                       real_seconds = bin2bcd(real_seconds);
+                       real_minutes = bin2bcd(real_minutes);
                }
                CMOS_WRITE(real_seconds, RTC_SECONDS);
                CMOS_WRITE(real_minutes, RTC_MINUTES);
index 8313fccced5e1553f6f3d7ff484d897d2d20b812..2bd1f6ef5db0c6bd45269701b36b4713c0a6def3 100644 (file)
@@ -90,6 +90,8 @@ config ARCH_MAY_HAVE_PC_FDC
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 
 menu "Processor type and features"
 
index 380baa1780e9197b060ce1594aa098340d4da538..9391199d9e7731d166b45130fa322e27936b1da0 100644 (file)
@@ -230,6 +230,8 @@ config PPC_OF_PLATFORM_PCI
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 source "arch/powerpc/sysdev/Kconfig"
 source "arch/powerpc/platforms/Kconfig"
 
index fda98715cd356a4b74a18e66c860c81ed62ee50a..5aa22cffdbd658d02c616ba822e3071ac642e3ac 100644 (file)
@@ -678,6 +678,8 @@ struct ps3av_pkt_avb_param {
        u8 buf[PS3AV_PKT_AVB_PARAM_MAX_BUF_SIZE];
 };
 
+/* channel status */
+extern u8 ps3av_mode_cs_info[];
 
 /** command status **/
 #define PS3AV_STATUS_SUCCESS                   0x0000  /* success */
@@ -735,6 +737,7 @@ extern int ps3av_get_mode(void);
 extern int ps3av_video_mode2res(u32, u32 *, u32 *);
 extern int ps3av_video_mute(int);
 extern int ps3av_audio_mute(int);
+extern int ps3av_audio_mute_analog(int);
 extern int ps3av_dev_open(void);
 extern int ps3av_dev_close(void);
 extern void ps3av_register_flip_ctl(void (*flip_ctl)(int on, void *data),
index a323c9b32ee12f76604631a532e2c5dc85436aa1..97e056379728c5f936aba586c0f9cf2acf044e34 100644 (file)
@@ -27,6 +27,9 @@
 #define DBG(fmt...)
 #endif
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 void __init reserve_kdump_trampoline(void)
 {
        lmb_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -66,7 +69,11 @@ void __init setup_kdump_trampoline(void)
        DBG(" <- setup_kdump_trampoline()\n");
 }
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
 static int __init parse_elfcorehdr(char *p)
 {
        if (p)
@@ -75,7 +82,6 @@ static int __init parse_elfcorehdr(char *p)
        return 1;
 }
 __setup("elfcorehdr=", parse_elfcorehdr);
-#endif
 
 static int __init parse_savemaxmem(char *p)
 {
index 98d7bf99533aff312712d747beed29141294608d..b9e1a1da6e52a35f2276c798b71e717bb224ec88 100644 (file)
@@ -134,23 +134,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
        return __add_pages(zone, start_pfn, nr_pages);
 }
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
-       unsigned long start_pfn, end_pfn;
-       int ret;
-
-       start_pfn = start >> PAGE_SHIFT;
-       end_pfn = start_pfn + (size >> PAGE_SHIFT);
-       ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
-       if (ret)
-               goto out;
-       /* Arch-specific calls go here - next patch */
-out:
-       return ret;
-}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 /*
index bc581d8a7cd9c35995e6bb75ae1f17d101840de1..70b7645ce745aa03863360201183a832aec3f130 100644 (file)
@@ -78,6 +78,8 @@ config S390
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "Base setup"
 
 comment "Processor type and features"
index ea40a9d690fcc19407d8fd8a610d1af5b25a7a7d..de3fad60c6828d95c45a10944b2bc42a9b8fefb5 100644 (file)
@@ -99,6 +99,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_31BIT              18      /* 32bit process */ 
 #define TIF_MEMDIE             19
 #define TIF_RESTORE_SIGMASK    20      /* restore signal mask in do_signal() */
+#define TIF_FREEZE             21      /* thread is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME     (1<<TIF_NOTIFY_RESUME)
@@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_USEDFPU           (1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT             (1<<TIF_31BIT)
+#define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 #endif /* __KERNEL__ */
 
index 1169130a97efdc2661a8e51fcfa9abbb33f4f5cc..158b0d6d70462942975048759222c6fa69874433 100644 (file)
@@ -189,14 +189,3 @@ int arch_add_memory(int nid, u64 start, u64 size)
        return rc;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int remove_memory(u64 start, u64 size)
-{
-       unsigned long start_pfn, end_pfn;
-
-       start_pfn = PFN_DOWN(start);
-       end_pfn = start_pfn + PFN_DOWN(size);
-       return offline_pages(start_pfn, end_pfn, 120 * HZ);
-}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
index b4aa2a03e19b3c3a3008e4b8ea6935760dcd7f6c..cb2c87df70ceeb9d8c4558fbafde0e3b35eb9220 100644 (file)
@@ -121,6 +121,8 @@ config IO_TRAPPED
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "System type"
 
 #
index 4a2ecbe27d8e9257c068edad01124fbd48e04c8b..95d216255565aa119640c96e0d496125546486ee 100644 (file)
@@ -10,6 +10,9 @@
 #include <linux/io.h>
 #include <asm/uaccess.h>
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
index 97671dac12a649765e3b8fda6d22317963872749..e594559c8dbaa7658f56f079354e8ed25468fdcf 100644 (file)
@@ -37,6 +37,8 @@ config HZ
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 menu "General machine setup"
 
 config SMP
index 29899fd5b1b228236d3f495c35e16ed511b57cb4..80fe547c3f45c5048c981ce2401e40243879e13f 100644 (file)
@@ -135,6 +135,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define TIF_POLLING_NRFLAG     9       /* true if poll_idle() is polling
                                         * TIF_NEED_RESCHED */
 #define TIF_MEMDIE             10
+#define TIF_FREEZE             11      /* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
@@ -148,6 +149,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define _TIF_DO_NOTIFY_RESUME_MASK     (_TIF_NOTIFY_RESUME | \
                                         _TIF_SIGPENDING | \
                                         _TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 #endif /* __KERNEL__ */
 
index c0a737d7292c4462456f0b0e27ff6aae149142a6..639ac805448ab500497270013b9176510849062a 100644 (file)
@@ -237,6 +237,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define TIF_ABI_PENDING                12
 #define TIF_MEMDIE             13
 #define TIF_POLLING_NRFLAG     14
+#define TIF_FREEZE             15      /* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME     (1<<TIF_NOTIFY_RESUME)
@@ -249,6 +250,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define _TIF_SYSCALL_AUDIT     (1<<TIF_SYSCALL_AUDIT)
 #define _TIF_ABI_PENDING       (1<<TIF_ABI_PENDING)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 #define _TIF_USER_WORK_MASK    ((0xff << TI_FLAG_WSAVED_SHIFT) | \
                                 _TIF_DO_NOTIFY_RESUME_MASK | \
index 5446e2a499b13d36e3e50a529e85cd0e91ad5de4..035b15af90d8a668583001ef9610ab761d7b9478 100644 (file)
@@ -96,6 +96,7 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
        def_bool y
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
index 6976812cfb184553bb93778bba38b2f3c01f922c..393bccfe1785108579dc6c18bab682f2e7984110 100644 (file)
@@ -229,6 +229,8 @@ endmenu
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 source "drivers/block/Kconfig"
 
 source "arch/um/Kconfig.char"
index fd0c25ad6af3ecdbd03be96d1a9ee142ec8560e2..129647375a6ccfb7f61910f0c8543ed9053f7480 100644 (file)
@@ -179,7 +179,8 @@ static int copy_sc_from_user(struct pt_regs *regs,
        if (have_fpx_regs) {
                struct user_fxsr_struct fpx;
 
-               err = copy_from_user(&fpx, &sc.fpstate->_fxsr_env[0],
+               err = copy_from_user(&fpx,
+                       &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0],
                                     sizeof(struct user_fxsr_struct));
                if (err)
                        return 1;
index bd3c2c53873ecb4cd0b944e31996b746884d6cf0..49349ba77d80b81141deb6065510e54fc74eb077 100644 (file)
@@ -193,6 +193,7 @@ config X86_TRAMPOLINE
 config KTIME_SCALAR
        def_bool X86_32
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
index 72d0c56c1b48eff12779b557a75f3aee376c5571..f7cdb3b457aadc7f0f8951323dc507827211be75 100644 (file)
@@ -13,6 +13,9 @@
 
 static void *kdump_buf_page;
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
index e90a60ef10c2b641f524844e4902969cdd8235b4..045b36cada655370382231cb186d45d5d8820d95 100644 (file)
@@ -10,6 +10,9 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
index 0a23b5795b25a87c38b117e4bfc168dfc565c2d5..dd6f2b71561bfbdc245e78cae8cfac30d44d425e 100644 (file)
@@ -52,7 +52,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 
        cmos_minutes = CMOS_READ(RTC_MINUTES);
        if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               BCD_TO_BIN(cmos_minutes);
+               cmos_minutes = bcd2bin(cmos_minutes);
 
        /*
         * since we're only adjusting minutes and seconds,
@@ -69,8 +69,8 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 
        if (abs(real_minutes - cmos_minutes) < 30) {
                if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-                       BIN_TO_BCD(real_seconds);
-                       BIN_TO_BCD(real_minutes);
+                       real_seconds = bin2bcd(real_seconds);
+                       real_minutes = bin2bcd(real_minutes);
                }
                CMOS_WRITE(real_seconds,RTC_SECONDS);
                CMOS_WRITE(real_minutes,RTC_MINUTES);
@@ -124,16 +124,16 @@ unsigned long mach_get_cmos_time(void)
        WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
 
        if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
-               BCD_TO_BIN(sec);
-               BCD_TO_BIN(min);
-               BCD_TO_BIN(hour);
-               BCD_TO_BIN(day);
-               BCD_TO_BIN(mon);
-               BCD_TO_BIN(year);
+               sec = bcd2bin(sec);
+               min = bcd2bin(min);
+               hour = bcd2bin(hour);
+               day = bcd2bin(day);
+               mon = bcd2bin(mon);
+               year = bcd2bin(year);
        }
 
        if (century) {
-               BCD_TO_BIN(century);
+               century = bcd2bin(century);
                year += century * 100;
                printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
        } else
index 2255782e8d4b94664ff8c3c053af26aa114d0e6c..b2c97874ec0f95f7cfbea51b7b7695a2939c1b01 100644 (file)
@@ -561,7 +561,13 @@ static void __init reserve_standard_io_resources(void)
 
 }
 
-#ifdef CONFIG_PROC_VMCORE
+/*
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+
+#ifdef CONFIG_CRASH_DUMP
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel. This option will be passed
  * by kexec loader to the capture kernel.
index a9ec89c3fbca32c3fda0e65d8da8846c8f96ed2a..407d8784f669603fd48667c8d2ee22bb8e870f35 100644 (file)
@@ -792,6 +792,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
        /* Must avoid aliasing mappings in the highmem code */
        kmap_flush_unused();
 
+       vm_unmap_aliases();
+
        cpa.vaddr = addr;
        cpa.numpages = numpages;
        cpa.mask_set = mask_set;
index 0013a729b41ddc93045510068da9bcbbadf76433..b61534c7a4c4d5894812c5bb97a89f29a0f72338 100644 (file)
@@ -871,6 +871,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
                        /* make sure there are no stray mappings of
                           this page */
                        kmap_flush_unused();
+                       vm_unmap_aliases();
        }
 }
 
index ae173f6edd8ba044f5cc5372193ea7ce0af2e299..d4d52f5a1cf7bae392801e19367030ebe58ccf79 100644 (file)
@@ -846,6 +846,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
                /* re-enable interrupts for kmap_flush_unused */
                xen_mc_issue(0);
                kmap_flush_unused();
+               vm_unmap_aliases();
                xen_mc_batch();
        }
 
index 02e417d3d8e95815713b1420146b069112da6b99..a213260b51e5b4d8d3d64d7aebc85d2b2f21eb74 100644 (file)
@@ -55,6 +55,7 @@ config HZ
        default 100
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
index b1c723f9f58d1fce3861730870b58550fe13f6a2..70f7f60929ca552f87dfa819dd93296abd3cea60 100644 (file)
@@ -431,7 +431,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
 }
 
 static struct device_attribute alarm_attr = {
-       .attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+       .attr = {.name = "alarm", .mode = 0644},
        .show = acpi_battery_alarm_show,
        .store = acpi_battery_alarm_store,
 };
index 10a36512647ca6107a76c5982a5f3acaa140b571..7b011e7e29fe00b35ab1616d4c7a232c32ec8d2b 100644 (file)
@@ -463,7 +463,7 @@ static ssize_t acpi_battery_alarm_store(struct device *dev,
 }
 
 static struct device_attribute alarm_attr = {
-       .attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+       .attr = {.name = "alarm", .mode = 0644},
        .show = acpi_battery_alarm_show,
        .store = acpi_battery_alarm_store,
 };
index bf5b04de02d1c6e48123454656ab3f36e72bf8b7..631ee2ee2ca0028193ab5d52dee2acc0d15579f2 100644 (file)
@@ -120,13 +120,13 @@ static int acpi_system_alarm_seq_show(struct seq_file *seq, void *offset)
        spin_unlock_irqrestore(&rtc_lock, flags);
 
        if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-               BCD_TO_BIN(sec);
-               BCD_TO_BIN(min);
-               BCD_TO_BIN(hr);
-               BCD_TO_BIN(day);
-               BCD_TO_BIN(mo);
-               BCD_TO_BIN(yr);
-               BCD_TO_BIN(cent);
+               sec = bcd2bin(sec);
+               min = bcd2bin(min);
+               hr = bcd2bin(hr);
+               day = bcd2bin(day);
+               mo = bcd2bin(mo);
+               yr = bcd2bin(yr);
+               cent = bcd2bin(cent);
        }
 
        /* we're trusting the FADT (see above) */
@@ -204,7 +204,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
 {
        u32 val = CMOS_READ(offset);
        if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               BCD_TO_BIN(val);
+               val = bcd2bin(val);
        return val;
 }
 
@@ -212,7 +212,7 @@ static u32 cmos_bcd_read(int offset, int rtc_control)
 static void cmos_bcd_write(u32 val, int offset, int rtc_control)
 {
        if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               BIN_TO_BCD(val);
+               val = bin2bcd(val);
        CMOS_WRITE(val, offset);
 }
 
index 91dec448b3edcd5055112a892f1c368433bc6281..24e80fd927e238a6e8ea60ebafe2d4ae57f5f2c1 100644 (file)
@@ -115,7 +115,6 @@ static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
        table_attr->attr.read = acpi_table_show;
        table_attr->attr.attr.name = table_attr->name;
        table_attr->attr.attr.mode = 0444;
-       table_attr->attr.attr.owner = THIS_MODULE;
 
        return;
 }
index af0d175c025dcd168c5a6de866351fd0b1f83d5a..5260e9e0df48a3f9b52221f179e72fac01b45f6a 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/memory_hotplug.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/stat.h>
+
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
@@ -325,7 +327,7 @@ memory_probe_store(struct class *class, const char *buf, size_t count)
 
        return count;
 }
-static CLASS_ATTR(probe, 0700, NULL, memory_probe_store);
+static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
 
 static int memory_probe_init(void)
 {
index 5116b78c632586884a19ab8b8329b798bc5cce14..f5207090885a8fc80f28b670b95a6fa13ca87418 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/nodemask.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
+#include <linux/swap.h>
 
 static struct sysdev_class node_class = {
        .name = "node",
@@ -61,34 +62,52 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
        si_meminfo_node(&i, nid);
 
        n = sprintf(buf, "\n"
-                      "Node %d MemTotal:     %8lu kB\n"
-                      "Node %d MemFree:      %8lu kB\n"
-                      "Node %d MemUsed:      %8lu kB\n"
-                      "Node %d Active:       %8lu kB\n"
-                      "Node %d Inactive:     %8lu kB\n"
+                      "Node %d MemTotal:       %8lu kB\n"
+                      "Node %d MemFree:        %8lu kB\n"
+                      "Node %d MemUsed:        %8lu kB\n"
+                      "Node %d Active:         %8lu kB\n"
+                      "Node %d Inactive:       %8lu kB\n"
+                      "Node %d Active(anon):   %8lu kB\n"
+                      "Node %d Inactive(anon): %8lu kB\n"
+                      "Node %d Active(file):   %8lu kB\n"
+                      "Node %d Inactive(file): %8lu kB\n"
+#ifdef CONFIG_UNEVICTABLE_LRU
+                      "Node %d Unevictable:    %8lu kB\n"
+                      "Node %d Mlocked:        %8lu kB\n"
+#endif
 #ifdef CONFIG_HIGHMEM
-                      "Node %d HighTotal:    %8lu kB\n"
-                      "Node %d HighFree:     %8lu kB\n"
-                      "Node %d LowTotal:     %8lu kB\n"
-                      "Node %d LowFree:      %8lu kB\n"
+                      "Node %d HighTotal:      %8lu kB\n"
+                      "Node %d HighFree:       %8lu kB\n"
+                      "Node %d LowTotal:       %8lu kB\n"
+                      "Node %d LowFree:        %8lu kB\n"
 #endif
-                      "Node %d Dirty:        %8lu kB\n"
-                      "Node %d Writeback:    %8lu kB\n"
-                      "Node %d FilePages:    %8lu kB\n"
-                      "Node %d Mapped:       %8lu kB\n"
-                      "Node %d AnonPages:    %8lu kB\n"
-                      "Node %d PageTables:   %8lu kB\n"
-                      "Node %d NFS_Unstable: %8lu kB\n"
-                      "Node %d Bounce:       %8lu kB\n"
-                      "Node %d WritebackTmp: %8lu kB\n"
-                      "Node %d Slab:         %8lu kB\n"
-                      "Node %d SReclaimable: %8lu kB\n"
-                      "Node %d SUnreclaim:   %8lu kB\n",
+                      "Node %d Dirty:          %8lu kB\n"
+                      "Node %d Writeback:      %8lu kB\n"
+                      "Node %d FilePages:      %8lu kB\n"
+                      "Node %d Mapped:         %8lu kB\n"
+                      "Node %d AnonPages:      %8lu kB\n"
+                      "Node %d PageTables:     %8lu kB\n"
+                      "Node %d NFS_Unstable:   %8lu kB\n"
+                      "Node %d Bounce:         %8lu kB\n"
+                      "Node %d WritebackTmp:   %8lu kB\n"
+                      "Node %d Slab:           %8lu kB\n"
+                      "Node %d SReclaimable:   %8lu kB\n"
+                      "Node %d SUnreclaim:     %8lu kB\n",
                       nid, K(i.totalram),
                       nid, K(i.freeram),
                       nid, K(i.totalram - i.freeram),
-                      nid, K(node_page_state(nid, NR_ACTIVE)),
-                      nid, K(node_page_state(nid, NR_INACTIVE)),
+                      nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
+                               node_page_state(nid, NR_ACTIVE_FILE)),
+                      nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
+                               node_page_state(nid, NR_INACTIVE_FILE)),
+                      nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
+                      nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
+                      nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
+                      nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+                      nid, K(node_page_state(nid, NR_UNEVICTABLE)),
+                      nid, K(node_page_state(nid, NR_MLOCK)),
+#endif
 #ifdef CONFIG_HIGHMEM
                       nid, K(i.totalhigh),
                       nid, K(i.freehigh),
@@ -173,6 +192,8 @@ int register_node(struct node *node, int num, struct node *parent)
                sysdev_create_file(&node->sysdev, &attr_meminfo);
                sysdev_create_file(&node->sysdev, &attr_numastat);
                sysdev_create_file(&node->sysdev, &attr_distance);
+
+               scan_unevictable_register_node(node);
        }
        return error;
 }
@@ -192,6 +213,8 @@ void unregister_node(struct node *node)
        sysdev_remove_file(&node->sysdev, &attr_numastat);
        sysdev_remove_file(&node->sysdev, &attr_distance);
 
+       scan_unevictable_unregister_node(node);
+
        sysdev_unregister(&node->sysdev);
 }
 
index b82654e883a759f10d46dd67f311eab59a35fec6..d876ad861237d9367abdf3df8ef9606274d0ebfb 100644 (file)
@@ -90,7 +90,7 @@ static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
 static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
 static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
 static struct device_attribute dev_attr_firmware_version = {
-       .attr = { .name = "firmware-version", .mode = S_IRUGO, .owner = THIS_MODULE },
+       .attr = { .name = "firmware-version", .mode = S_IRUGO },
        .show = aoedisk_show_fwver,
 };
 
index 7b3351260d564015c203d9a2342b94d063eaaccb..9034ca585afd710d6425d53f7b6357c13357a6af 100644 (file)
@@ -391,7 +391,7 @@ static ssize_t pid_show(struct device *dev,
 }
 
 static struct device_attribute pid_attr = {
-       .attr = { .name = "pid", .mode = S_IRUGO, .owner = THIS_MODULE },
+       .attr = { .name = "pid", .mode = S_IRUGO},
        .show = pid_show,
 };
 
index 5329d482b582bd06fff9d4acf66b044683de05e5..0a826d7be10ee21b85ef5178d3830aac0ef46e9a 100644 (file)
@@ -210,8 +210,8 @@ static int ds1286_ioctl(struct inode *inode, struct file *file,
                if (sec != 0)
                        return -EINVAL;
 
-               min = BIN2BCD(min);
-               min = BIN2BCD(hrs);
+               min = bin2bcd(min);
+               min = bin2bcd(hrs);
 
                spin_lock(&ds1286_lock);
                rtc_write(hrs, RTC_HOURS_ALARM);
@@ -353,7 +353,7 @@ static int ds1286_proc_output(char *buf)
 
        ds1286_get_time(&tm);
        hundredth = rtc_read(RTC_HUNDREDTH_SECOND);
-       BCD_TO_BIN(hundredth);
+       hundredth = bcd2bin(hundredth);
 
        p += sprintf(p,
                     "rtc_time\t: %02d:%02d:%02d.%02d\n"
@@ -477,12 +477,12 @@ static void ds1286_get_time(struct rtc_time *rtc_tm)
        rtc_write(save_control, RTC_CMD);
        spin_unlock_irqrestore(&ds1286_lock, flags);
 
-       BCD_TO_BIN(rtc_tm->tm_sec);
-       BCD_TO_BIN(rtc_tm->tm_min);
-       BCD_TO_BIN(rtc_tm->tm_hour);
-       BCD_TO_BIN(rtc_tm->tm_mday);
-       BCD_TO_BIN(rtc_tm->tm_mon);
-       BCD_TO_BIN(rtc_tm->tm_year);
+       rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+       rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+       rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+       rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+       rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+       rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
        /*
         * Account for differences between how the RTC uses the values
@@ -531,12 +531,12 @@ static int ds1286_set_time(struct rtc_time *rtc_tm)
        if (yrs >= 100)
                yrs -= 100;
 
-       BIN_TO_BCD(sec);
-       BIN_TO_BCD(min);
-       BIN_TO_BCD(hrs);
-       BIN_TO_BCD(day);
-       BIN_TO_BCD(mon);
-       BIN_TO_BCD(yrs);
+       sec = bin2bcd(sec);
+       min = bin2bcd(min);
+       hrs = bin2bcd(hrs);
+       day = bin2bcd(day);
+       mon = bin2bcd(mon);
+       yrs = bin2bcd(yrs);
 
        spin_lock_irqsave(&ds1286_lock, flags);
        save_control = rtc_read(RTC_CMD);
@@ -572,8 +572,8 @@ static void ds1286_get_alm_time(struct rtc_time *alm_tm)
        cmd = rtc_read(RTC_CMD);
        spin_unlock_irqrestore(&ds1286_lock, flags);
 
-       BCD_TO_BIN(alm_tm->tm_min);
-       BCD_TO_BIN(alm_tm->tm_hour);
+       alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
+       alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
        alm_tm->tm_sec = 0;
 }
 
index c5e67a623951bbc89f05cb7f9b5ade1530866fc1..170693c93c73d275fceb2c310c41df8a323c9297 100644 (file)
@@ -131,12 +131,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
 
        local_irq_restore(flags);
 
-       BCD_TO_BIN(rtc_tm->tm_sec);
-       BCD_TO_BIN(rtc_tm->tm_min);
-       BCD_TO_BIN(rtc_tm->tm_hour);
-       BCD_TO_BIN(rtc_tm->tm_mday);
-       BCD_TO_BIN(rtc_tm->tm_mon);
-       BCD_TO_BIN(rtc_tm->tm_year);
+       rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+       rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+       rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+       rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+       rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+       rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
        /*
         * Account for differences between how the RTC uses the values
@@ -211,12 +211,12 @@ static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                        else
                                yrs -= 1900;    /* RTC (70, 71, ... 99) */
 
-                       BIN_TO_BCD(sec);
-                       BIN_TO_BCD(min);
-                       BIN_TO_BCD(hrs);
-                       BIN_TO_BCD(day);
-                       BIN_TO_BCD(mon);
-                       BIN_TO_BCD(yrs);
+                       sec = bin2bcd(sec);
+                       min = bin2bcd(min);
+                       hrs = bin2bcd(hrs);
+                       day = bin2bcd(day);
+                       mon = bin2bcd(mon);
+                       yrs = bin2bcd(yrs);
 
                        lock_kernel();
                        local_irq_save(flags);
index ec9d0443d92c7a3f77b67832d064111119dfdead..2abd881b4cbcc07a8b16c087b9b474d2c6e58100 100644 (file)
@@ -130,12 +130,12 @@ static long rtc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (yrs >= 100)
                        yrs -= 100;
 
-               sec = BIN2BCD(sec);
-               min = BIN2BCD(min);
-               hrs = BIN2BCD(hrs);
-               day = BIN2BCD(day);
-               mon = BIN2BCD(mon);
-               yrs = BIN2BCD(yrs);
+               sec = bin2bcd(sec);
+               min = bin2bcd(min);
+               hrs = bin2bcd(hrs);
+               day = bin2bcd(day);
+               mon = bin2bcd(mon);
+               yrs = bin2bcd(yrs);
 
                spin_lock_irq(&rtc_lock);
                rtc->control |= M48T35_RTC_SET;
@@ -311,12 +311,12 @@ static void get_rtc_time(struct rtc_time *rtc_tm)
        rtc->control &= ~M48T35_RTC_READ;
        spin_unlock_irq(&rtc_lock);
 
-       rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec);
-       rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min);
-       rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour);
-       rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday);
-       rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon);
-       rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year);
+       rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+       rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+       rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+       rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+       rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+       rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
        /*
         * Account for differences between how the RTC uses the values
index b930de50407aaac9536c9a74adb8b740ce3bae67..3f7da8cf3a80aa0b95b2d98875c3a742f50cd8d1 100644 (file)
@@ -41,7 +41,8 @@ static u8 pc8736x_gpio_shadow[4];
 #define SIO_BASE2       0x4E   /* alt command-reg to check */
 
 #define SIO_SID                0x20    /* SuperI/O ID Register */
-#define SIO_SID_VALUE  0xe9    /* Expected value in SuperI/O ID Register */
+#define SIO_SID_PC87365        0xe5    /* Expected value in ID Register for PC87365 */
+#define SIO_SID_PC87366        0xe9    /* Expected value in ID Register for PC87366 */
 
 #define SIO_CF1                0x21    /* chip config, bit0 is chip enable */
 
@@ -91,13 +92,17 @@ static inline int superio_inb(int addr)
 
 static int pc8736x_superio_present(void)
 {
+       int id;
+
        /* try the 2 possible values, read a hardware reg to verify */
        superio_cmd = SIO_BASE1;
-       if (superio_inb(SIO_SID) == SIO_SID_VALUE)
+       id = superio_inb(SIO_SID);
+       if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
                return superio_cmd;
 
        superio_cmd = SIO_BASE2;
-       if (superio_inb(SIO_SID) == SIO_SID_VALUE)
+       id = superio_inb(SIO_SID);
+       if (id == SIO_SID_PC87365 || id == SIO_SID_PC87366)
                return superio_cmd;
 
        return 0;
index 17683de95717e2a2b5eb763fa345ce418fed6181..32dc89720d5896a6f6efebcfdfa8e8999999d85f 100644 (file)
@@ -518,17 +518,17 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
                if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) ||
                                                        RTC_ALWAYS_BCD) {
                        if (sec < 60)
-                               BIN_TO_BCD(sec);
+                               sec = bin2bcd(sec);
                        else
                                sec = 0xff;
 
                        if (min < 60)
-                               BIN_TO_BCD(min);
+                               min = bin2bcd(min);
                        else
                                min = 0xff;
 
                        if (hrs < 24)
-                               BIN_TO_BCD(hrs);
+                               hrs = bin2bcd(hrs);
                        else
                                hrs = 0xff;
                }
@@ -614,12 +614,12 @@ static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel)
 
                if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
                    || RTC_ALWAYS_BCD) {
-                       BIN_TO_BCD(sec);
-                       BIN_TO_BCD(min);
-                       BIN_TO_BCD(hrs);
-                       BIN_TO_BCD(day);
-                       BIN_TO_BCD(mon);
-                       BIN_TO_BCD(yrs);
+                       sec = bin2bcd(sec);
+                       min = bin2bcd(min);
+                       hrs = bin2bcd(hrs);
+                       day = bin2bcd(day);
+                       mon = bin2bcd(mon);
+                       yrs = bin2bcd(yrs);
                }
 
                save_control = CMOS_READ(RTC_CONTROL);
@@ -1099,7 +1099,7 @@ no_irq:
        spin_unlock_irq(&rtc_lock);
 
        if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               BCD_TO_BIN(year);       /* This should never happen... */
+               year = bcd2bin(year);       /* This should never happen... */
 
        if (year < 20) {
                epoch = 2000;
@@ -1352,13 +1352,13 @@ static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
        spin_unlock_irqrestore(&rtc_lock, flags);
 
        if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-               BCD_TO_BIN(rtc_tm->tm_sec);
-               BCD_TO_BIN(rtc_tm->tm_min);
-               BCD_TO_BIN(rtc_tm->tm_hour);
-               BCD_TO_BIN(rtc_tm->tm_mday);
-               BCD_TO_BIN(rtc_tm->tm_mon);
-               BCD_TO_BIN(rtc_tm->tm_year);
-               BCD_TO_BIN(rtc_tm->tm_wday);
+               rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+               rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+               rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+               rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+               rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+               rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
+               rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
        }
 
 #ifdef CONFIG_MACH_DECSTATION
@@ -1392,9 +1392,9 @@ static void get_rtc_alm_time(struct rtc_time *alm_tm)
        spin_unlock_irq(&rtc_lock);
 
        if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-               BCD_TO_BIN(alm_tm->tm_sec);
-               BCD_TO_BIN(alm_tm->tm_min);
-               BCD_TO_BIN(alm_tm->tm_hour);
+               alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
+               alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
+               alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
        }
 }
 
index 5b8d7a1aa3e64853c21eb524339123a81bd31f3b..ba4e86281fbf29dde6dc9b121da6228628f22d33 100644 (file)
@@ -2504,7 +2504,7 @@ static void __devexit sx_remove_card(struct sx_board *board,
                del_timer(&board->timer);
                if (pdev) {
 #ifdef CONFIG_PCI
-                       pci_iounmap(pdev, board->base2);
+                       iounmap(board->base2);
                        pci_release_region(pdev, IS_CF_BOARD(board) ? 3 : 2);
 #endif
                } else {
@@ -2677,7 +2677,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
        }
        board->hw_base = pci_resource_start(pdev, reg);
        board->base2 =
-       board->base = pci_iomap(pdev, reg, WINDOW_LEN(board));
+       board->base = ioremap_nocache(board->hw_base, WINDOW_LEN(board));
        if (!board->base) {
                dev_err(&pdev->dev, "ioremap failed\n");
                goto err_reg;
@@ -2703,7 +2703,7 @@ static int __devinit sx_pci_probe(struct pci_dev *pdev,
 
        return 0;
 err_unmap:
-       pci_iounmap(pdev, board->base2);
+       iounmap(board->base2);
 err_reg:
        pci_release_region(pdev, reg);
 err_flag:
index dce4cc0e695338210391a11311507ef1b8d49f76..d0c0d64ed366ce9e961c7dbbfca03a9764e29321 100644 (file)
@@ -168,7 +168,7 @@ static void sysrq_handle_show_timers(int key, struct tty_struct *tty)
 static struct sysrq_key_op sysrq_show_timers_op = {
        .handler        = sysrq_handle_show_timers,
        .help_msg       = "show-all-timers(Q)",
-       .action_msg     = "Show Pending Timers",
+       .action_msg     = "Show pending hrtimers (no others)",
 };
 
 static void sysrq_handle_mountro(int key, struct tty_struct *tty)
index e70d13defde4bcc30724187ff6443f553730eeb7..9c47dc48c9fd4526ac8e8972fb80c6b21ba2cae8 100644 (file)
@@ -1157,7 +1157,7 @@ EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
  * Once all references to platform device are down to 0,
  * release all allocated structures.
  */
-static void tpm_dev_release(struct device *dev)
+void tpm_dev_release(struct device *dev)
 {
        struct tpm_chip *chip = dev_get_drvdata(dev);
 
index 0e024fe2d8c486ce8ab0bddca34b91e8b9f53165..887072f5dc8b8b703db223751a4bd47b6631e5f8 100644 (file)
@@ -142,7 +142,7 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
                csrow->nr_pages = (r.end - r.start + 1) >> PAGE_SHIFT;
                csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
                csrow->mtype = MEM_XDR;
-               csrow->edac_mode = EDAC_FLAG_EC | EDAC_FLAG_SECDED;
+               csrow->edac_mode = EDAC_SECDED;
                dev_dbg(mci->dev,
                        "Initialized on node %d, chanmask=0x%x,"
                        " first_page=0x%lx, nr_pages=0x%x\n",
index deb154aa47c412be66f4ee68281f053cf7a4c241..4353414a0b770c368b938760e8630f50bd065346 100644 (file)
@@ -732,7 +732,6 @@ static int __init ibft_create_attribute(struct ibft_kobject *kobj_data,
 
        attr->attr.name = name;
        attr->attr.mode = S_IRUSR;
-       attr->attr.owner = THIS_MODULE;
 
        attr->hdr = hdr;
        attr->show = show;
index 9112830107a53d4c208b5a8cbf1934026549bac1..22edc4273ef68cd46eceabe2ec249cca93faf0a5 100644 (file)
@@ -248,7 +248,7 @@ static ssize_t gpio_value_show(struct device *dev,
        if (!test_bit(FLAG_EXPORT, &desc->flags))
                status = -EIO;
        else
-               status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+               status = sprintf(buf, "%d\n", !!gpio_get_value_cansleep(gpio));
 
        mutex_unlock(&sysfs_lock);
        return status;
@@ -1105,7 +1105,7 @@ int gpio_get_value_cansleep(unsigned gpio)
 
        might_sleep_if(extra_checks);
        chip = gpio_to_chip(gpio);
-       return chip->get(chip, gpio - chip->base);
+       return chip->get ? chip->get(chip, gpio - chip->base) : 0;
 }
 EXPORT_SYMBOL_GPL(gpio_get_value_cansleep);
 
index b06b8e090a278e9164c8603d7fa1d311be17317e..bc011da79e148249e24a1439e3c4cb55fcd1c7b0 100644 (file)
@@ -49,6 +49,9 @@
 
 #define APPLESMC_MAX_DATA_LENGTH 32
 
+#define APPLESMC_MIN_WAIT      0x0040
+#define APPLESMC_MAX_WAIT      0x8000
+
 #define APPLESMC_STATUS_MASK   0x0f
 #define APPLESMC_READ_CMD      0x10
 #define APPLESMC_WRITE_CMD     0x11
@@ -57,8 +60,8 @@
 
 #define KEY_COUNT_KEY          "#KEY" /* r-o ui32 */
 
-#define LIGHT_SENSOR_LEFT_KEY  "ALV0" /* r-o {alv (6 bytes) */
-#define LIGHT_SENSOR_RIGHT_KEY "ALV1" /* r-o {alv (6 bytes) */
+#define LIGHT_SENSOR_LEFT_KEY  "ALV0" /* r-o {alv (6-10 bytes) */
+#define LIGHT_SENSOR_RIGHT_KEY "ALV1" /* r-o {alv (6-10 bytes) */
 #define BACKLIGHT_KEY          "LKSB" /* w-o {lkb (2 bytes) */
 
 #define CLAMSHELL_KEY          "MSLD" /* r-o ui8 (unused) */
@@ -104,6 +107,15 @@ static const char* temperature_sensors_sets[][36] = {
 /* Set 6: Macbook3 set */
        { "TB0T", "TC0D", "TC0P", "TM0P", "TN0P", "TTF0", "TW0P", "Th0H",
          "Th0S", "Th1H", NULL },
+/* Set 7: Macbook Air */
+       { "TB0T", "TB1S", "TB1T", "TB2S", "TB2T", "TC0D", "TC0P", "TCFP",
+         "TTF0", "TW0P", "Th0H", "Tp0P", "TpFP", "Ts0P", "Ts0S", NULL },
+/* Set 8: Macbook Pro 4,1 (Penryn) */
+       { "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P", "Th0H",
+         "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
+/* Set 9: Macbook Pro 3,1 (Santa Rosa) */
+       { "TALP", "TB0T", "TC0D", "TC0P", "TG0D", "TG0H", "TTF0", "TW0P",
+         "Th0H", "Th1H", "Th2H", "Tm0P", "Ts0P", NULL },
 };
 
 /* List of keys used to read/write fan speeds */
@@ -163,25 +175,25 @@ static unsigned int key_at_index;
 static struct workqueue_struct *applesmc_led_wq;
 
 /*
- * __wait_status - Wait up to 2ms for the status port to get a certain value
+ * __wait_status - Wait up to 32ms for the status port to get a certain value
  * (masked with 0x0f), returning zero if the value is obtained.  Callers must
  * hold applesmc_lock.
  */
 static int __wait_status(u8 val)
 {
-       unsigned int i;
+       int us;
 
        val = val & APPLESMC_STATUS_MASK;
 
-       for (i = 0; i < 200; i++) {
+       for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
+               udelay(us);
                if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == val) {
                        if (debug)
                                printk(KERN_DEBUG
-                                               "Waited %d us for status %x\n",
-                                               i*10, val);
+                                       "Waited %d us for status %x\n",
+                                       2 * us - APPLESMC_MIN_WAIT, val);
                        return 0;
                }
-               udelay(10);
        }
 
        printk(KERN_WARNING "applesmc: wait status failed: %x != %x\n",
@@ -190,6 +202,25 @@ static int __wait_status(u8 val)
        return -EIO;
 }
 
+/*
+ * special treatment of command port - on newer macbooks, it seems necessary
+ * to resend the command byte before polling the status again. Callers must
+ * hold applesmc_lock.
+ */
+static int send_command(u8 cmd)
+{
+       int us;
+       for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) {
+               outb(cmd, APPLESMC_CMD_PORT);
+               udelay(us);
+               if ((inb(APPLESMC_CMD_PORT) & APPLESMC_STATUS_MASK) == 0x0c)
+                       return 0;
+       }
+       printk(KERN_WARNING "applesmc: command failed: %x -> %x\n",
+               cmd, inb(APPLESMC_CMD_PORT));
+       return -EIO;
+}
+
 /*
  * applesmc_read_key - reads len bytes from a given key, and put them in buffer.
  * Returns zero on success or a negative error on failure. Callers must
@@ -205,8 +236,7 @@ static int applesmc_read_key(const char* key, u8* buffer, u8 len)
                return -EINVAL;
        }
 
-       outb(APPLESMC_READ_CMD, APPLESMC_CMD_PORT);
-       if (__wait_status(0x0c))
+       if (send_command(APPLESMC_READ_CMD))
                return -EIO;
 
        for (i = 0; i < 4; i++) {
@@ -249,8 +279,7 @@ static int applesmc_write_key(const char* key, u8* buffer, u8 len)
                return -EINVAL;
        }
 
-       outb(APPLESMC_WRITE_CMD, APPLESMC_CMD_PORT);
-       if (__wait_status(0x0c))
+       if (send_command(APPLESMC_WRITE_CMD))
                return -EIO;
 
        for (i = 0; i < 4; i++) {
@@ -284,8 +313,7 @@ static int applesmc_get_key_at_index(int index, char* key)
        readkey[2] = index >> 8;
        readkey[3] = index;
 
-       outb(APPLESMC_GET_KEY_BY_INDEX_CMD, APPLESMC_CMD_PORT);
-       if (__wait_status(0x0c))
+       if (send_command(APPLESMC_GET_KEY_BY_INDEX_CMD))
                return -EIO;
 
        for (i = 0; i < 4; i++) {
@@ -315,8 +343,7 @@ static int applesmc_get_key_type(char* key, char* type)
 {
        int i;
 
-       outb(APPLESMC_GET_KEY_TYPE_CMD, APPLESMC_CMD_PORT);
-       if (__wait_status(0x0c))
+       if (send_command(APPLESMC_GET_KEY_TYPE_CMD))
                return -EIO;
 
        for (i = 0; i < 4; i++) {
@@ -325,7 +352,7 @@ static int applesmc_get_key_type(char* key, char* type)
                        return -EIO;
        }
 
-       outb(5, APPLESMC_DATA_PORT);
+       outb(6, APPLESMC_DATA_PORT);
 
        for (i = 0; i < 6; i++) {
                if (__wait_status(0x05))
@@ -527,17 +554,27 @@ out:
 static ssize_t applesmc_light_show(struct device *dev,
                                struct device_attribute *attr, char *sysfsbuf)
 {
+       static int data_length;
        int ret;
        u8 left = 0, right = 0;
-       u8 buffer[6];
+       u8 buffer[10], query[6];
 
        mutex_lock(&applesmc_lock);
 
-       ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, 6);
+       if (!data_length) {
+               ret = applesmc_get_key_type(LIGHT_SENSOR_LEFT_KEY, query);
+               if (ret)
+                       goto out;
+               data_length = clamp_val(query[0], 0, 10);
+               printk(KERN_INFO "applesmc: light sensor data length set to "
+                       "%d\n", data_length);
+       }
+
+       ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length);
        left = buffer[2];
        if (ret)
                goto out;
-       ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, 6);
+       ret = applesmc_read_key(LIGHT_SENSOR_RIGHT_KEY, buffer, data_length);
        right = buffer[2];
 
 out:
@@ -1233,39 +1270,57 @@ static __initdata struct dmi_match_data applesmc_dmi_data[] = {
        { .accelerometer = 0, .light = 0, .temperature_set = 5 },
 /* MacBook3: accelerometer and temperature set 6 */
        { .accelerometer = 1, .light = 0, .temperature_set = 6 },
+/* MacBook Air: accelerometer, backlight and temperature set 7 */
+       { .accelerometer = 1, .light = 1, .temperature_set = 7 },
+/* MacBook Pro 4: accelerometer, backlight and temperature set 8 */
+       { .accelerometer = 1, .light = 1, .temperature_set = 8 },
+/* MacBook Pro 3: accelerometer, backlight and temperature set 9 */
+       { .accelerometer = 1, .light = 1, .temperature_set = 9 },
 };
 
 /* Note that DMI_MATCH(...,"MacBook") will match "MacBookPro1,1".
  * So we need to put "Apple MacBook Pro" before "Apple MacBook". */
 static __initdata struct dmi_system_id applesmc_whitelist[] = {
+       { applesmc_dmi_match, "Apple MacBook Air", {
+         DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+         DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir") },
+               &applesmc_dmi_data[7]},
+       { applesmc_dmi_match, "Apple MacBook Pro 4", {
+         DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+         DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4") },
+               &applesmc_dmi_data[8]},
+       { applesmc_dmi_match, "Apple MacBook Pro 3", {
+         DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+         DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro3") },
+               &applesmc_dmi_data[9]},
        { applesmc_dmi_match, "Apple MacBook Pro", {
          DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
          DMI_MATCH(DMI_PRODUCT_NAME,"MacBookPro") },
-               (void*)&applesmc_dmi_data[0]},
+               &applesmc_dmi_data[0]},
        { applesmc_dmi_match, "Apple MacBook (v2)", {
          DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
          DMI_MATCH(DMI_PRODUCT_NAME,"MacBook2") },
-               (void*)&applesmc_dmi_data[1]},
+               &applesmc_dmi_data[1]},
        { applesmc_dmi_match, "Apple MacBook (v3)", {
          DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
          DMI_MATCH(DMI_PRODUCT_NAME,"MacBook3") },
-               (void*)&applesmc_dmi_data[6]},
+               &applesmc_dmi_data[6]},
        { applesmc_dmi_match, "Apple MacBook", {
          DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
          DMI_MATCH(DMI_PRODUCT_NAME,"MacBook") },
-               (void*)&applesmc_dmi_data[2]},
+               &applesmc_dmi_data[2]},
        { applesmc_dmi_match, "Apple Macmini", {
          DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
          DMI_MATCH(DMI_PRODUCT_NAME,"Macmini") },
-               (void*)&applesmc_dmi_data[3]},
+               &applesmc_dmi_data[3]},
        { applesmc_dmi_match, "Apple MacPro2", {
          DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
          DMI_MATCH(DMI_PRODUCT_NAME,"MacPro2") },
-               (void*)&applesmc_dmi_data[4]},
+               &applesmc_dmi_data[4]},
        { applesmc_dmi_match, "Apple iMac", {
          DMI_MATCH(DMI_BOARD_VENDOR,"Apple"),
          DMI_MATCH(DMI_PRODUCT_NAME,"iMac") },
-               (void*)&applesmc_dmi_data[5]},
+               &applesmc_dmi_data[5]},
        { .ident = NULL }
 };
 
index 9b462bb13fa33b3b9cc67d66e884628a7e2043ba..5fbfa34c110e36e455e632363eaff7228f4b0aaf 100644 (file)
@@ -75,7 +75,8 @@ MODULE_PARM_DESC(force_id, "Override the detected device ID");
 #define FSCM   0x09    /* Logical device: fans */
 #define VLM    0x0d    /* Logical device: voltages */
 #define TMS    0x0e    /* Logical device: temperatures */
-static const u8 logdev[3] = { FSCM, VLM, TMS };
+#define LDNI_MAX 3
+static const u8 logdev[LDNI_MAX] = { FSCM, VLM, TMS };
 
 #define LD_FAN         0
 #define LD_IN          1
@@ -489,11 +490,66 @@ static struct sensor_device_attribute in_max[] = {
        SENSOR_ATTR(in10_max, S_IWUSR | S_IRUGO, show_in_max, set_in_max, 10),
 };
 
+/* (temp & vin) channel status register alarm bits (pdf sec.11.5.12) */
+#define CHAN_ALM_MIN   0x02    /* min limit crossed */
+#define CHAN_ALM_MAX   0x04    /* max limit exceeded */
+#define TEMP_ALM_CRIT  0x08    /* temp crit exceeded (temp only) */
+
+/* show_in_min/max_alarm() reads data from the per-channel status
+   register (sec 11.5.12), not the vin event status registers (sec
+   11.5.2) that (legacy) show_in_alarm() resds (via data->in_alarms) */
+
+static ssize_t show_in_min_alarm(struct device *dev,
+                       struct device_attribute *devattr, char *buf)
+{
+       struct pc87360_data *data = pc87360_update_device(dev);
+       unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+       return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_in_max_alarm(struct device *dev,
+                       struct device_attribute *devattr, char *buf)
+{
+       struct pc87360_data *data = pc87360_update_device(dev);
+       unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+       return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
+}
+
+static struct sensor_device_attribute in_min_alarm[] = {
+       SENSOR_ATTR(in0_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 0),
+       SENSOR_ATTR(in1_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 1),
+       SENSOR_ATTR(in2_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 2),
+       SENSOR_ATTR(in3_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 3),
+       SENSOR_ATTR(in4_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 4),
+       SENSOR_ATTR(in5_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 5),
+       SENSOR_ATTR(in6_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 6),
+       SENSOR_ATTR(in7_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 7),
+       SENSOR_ATTR(in8_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 8),
+       SENSOR_ATTR(in9_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 9),
+       SENSOR_ATTR(in10_min_alarm, S_IRUGO, show_in_min_alarm, NULL, 10),
+};
+static struct sensor_device_attribute in_max_alarm[] = {
+       SENSOR_ATTR(in0_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 0),
+       SENSOR_ATTR(in1_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 1),
+       SENSOR_ATTR(in2_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 2),
+       SENSOR_ATTR(in3_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 3),
+       SENSOR_ATTR(in4_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 4),
+       SENSOR_ATTR(in5_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 5),
+       SENSOR_ATTR(in6_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 6),
+       SENSOR_ATTR(in7_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 7),
+       SENSOR_ATTR(in8_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 8),
+       SENSOR_ATTR(in9_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 9),
+       SENSOR_ATTR(in10_max_alarm, S_IRUGO, show_in_max_alarm, NULL, 10),
+};
+
 #define VIN_UNIT_ATTRS(X) \
        &in_input[X].dev_attr.attr,     \
        &in_status[X].dev_attr.attr,    \
        &in_min[X].dev_attr.attr,       \
-       &in_max[X].dev_attr.attr
+       &in_max[X].dev_attr.attr,       \
+       &in_min_alarm[X].dev_attr.attr, \
+       &in_max_alarm[X].dev_attr.attr
 
 static ssize_t show_vid(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -658,12 +714,68 @@ static struct sensor_device_attribute therm_crit[] = {
                    show_therm_crit, set_therm_crit, 2+11),
 };
 
+/* show_therm_min/max_alarm() reads data from the per-channel voltage
+   status register (sec 11.5.12) */
+
+static ssize_t show_therm_min_alarm(struct device *dev,
+                               struct device_attribute *devattr, char *buf)
+{
+       struct pc87360_data *data = pc87360_update_device(dev);
+       unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+       return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_therm_max_alarm(struct device *dev,
+                               struct device_attribute *devattr, char *buf)
+{
+       struct pc87360_data *data = pc87360_update_device(dev);
+       unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+       return sprintf(buf, "%u\n", !!(data->in_status[nr] & CHAN_ALM_MAX));
+}
+static ssize_t show_therm_crit_alarm(struct device *dev,
+                               struct device_attribute *devattr, char *buf)
+{
+       struct pc87360_data *data = pc87360_update_device(dev);
+       unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+       return sprintf(buf, "%u\n", !!(data->in_status[nr] & TEMP_ALM_CRIT));
+}
+
+static struct sensor_device_attribute therm_min_alarm[] = {
+       SENSOR_ATTR(temp4_min_alarm, S_IRUGO,
+                   show_therm_min_alarm, NULL, 0+11),
+       SENSOR_ATTR(temp5_min_alarm, S_IRUGO,
+                   show_therm_min_alarm, NULL, 1+11),
+       SENSOR_ATTR(temp6_min_alarm, S_IRUGO,
+                   show_therm_min_alarm, NULL, 2+11),
+};
+static struct sensor_device_attribute therm_max_alarm[] = {
+       SENSOR_ATTR(temp4_max_alarm, S_IRUGO,
+                   show_therm_max_alarm, NULL, 0+11),
+       SENSOR_ATTR(temp5_max_alarm, S_IRUGO,
+                   show_therm_max_alarm, NULL, 1+11),
+       SENSOR_ATTR(temp6_max_alarm, S_IRUGO,
+                   show_therm_max_alarm, NULL, 2+11),
+};
+static struct sensor_device_attribute therm_crit_alarm[] = {
+       SENSOR_ATTR(temp4_crit_alarm, S_IRUGO,
+                   show_therm_crit_alarm, NULL, 0+11),
+       SENSOR_ATTR(temp5_crit_alarm, S_IRUGO,
+                   show_therm_crit_alarm, NULL, 1+11),
+       SENSOR_ATTR(temp6_crit_alarm, S_IRUGO,
+                   show_therm_crit_alarm, NULL, 2+11),
+};
+
 #define THERM_UNIT_ATTRS(X) \
        &therm_input[X].dev_attr.attr,  \
        &therm_status[X].dev_attr.attr, \
        &therm_min[X].dev_attr.attr,    \
        &therm_max[X].dev_attr.attr,    \
-       &therm_crit[X].dev_attr.attr
+       &therm_crit[X].dev_attr.attr,   \
+       &therm_min_alarm[X].dev_attr.attr, \
+       &therm_max_alarm[X].dev_attr.attr, \
+       &therm_crit_alarm[X].dev_attr.attr
 
 static struct attribute * pc8736x_therm_attr_array[] = {
        THERM_UNIT_ATTRS(0),
@@ -790,12 +902,76 @@ static ssize_t show_temp_alarms(struct device *dev, struct device_attribute *att
 }
 static DEVICE_ATTR(alarms_temp, S_IRUGO, show_temp_alarms, NULL);
 
+/* show_temp_min/max_alarm() reads data from the per-channel status
+   register (sec 12.3.7), not the temp event status registers (sec
+   12.3.2) that show_temp_alarm() reads (via data->temp_alarms) */
+
+static ssize_t show_temp_min_alarm(struct device *dev,
+                       struct device_attribute *devattr, char *buf)
+{
+       struct pc87360_data *data = pc87360_update_device(dev);
+       unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+       return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MIN));
+}
+static ssize_t show_temp_max_alarm(struct device *dev,
+                       struct device_attribute *devattr, char *buf)
+{
+       struct pc87360_data *data = pc87360_update_device(dev);
+       unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+       return sprintf(buf, "%u\n", !!(data->temp_status[nr] & CHAN_ALM_MAX));
+}
+static ssize_t show_temp_crit_alarm(struct device *dev,
+                       struct device_attribute *devattr, char *buf)
+{
+       struct pc87360_data *data = pc87360_update_device(dev);
+       unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+       return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_ALM_CRIT));
+}
+
+static struct sensor_device_attribute temp_min_alarm[] = {
+       SENSOR_ATTR(temp1_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 0),
+       SENSOR_ATTR(temp2_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 1),
+       SENSOR_ATTR(temp3_min_alarm, S_IRUGO, show_temp_min_alarm, NULL, 2),
+};
+static struct sensor_device_attribute temp_max_alarm[] = {
+       SENSOR_ATTR(temp1_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 0),
+       SENSOR_ATTR(temp2_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 1),
+       SENSOR_ATTR(temp3_max_alarm, S_IRUGO, show_temp_max_alarm, NULL, 2),
+};
+static struct sensor_device_attribute temp_crit_alarm[] = {
+       SENSOR_ATTR(temp1_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 0),
+       SENSOR_ATTR(temp2_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 1),
+       SENSOR_ATTR(temp3_crit_alarm, S_IRUGO, show_temp_crit_alarm, NULL, 2),
+};
+
+#define TEMP_FAULT     0x40    /* open diode */
+static ssize_t show_temp_fault(struct device *dev,
+                       struct device_attribute *devattr, char *buf)
+{
+       struct pc87360_data *data = pc87360_update_device(dev);
+       unsigned nr = to_sensor_dev_attr(devattr)->index;
+
+       return sprintf(buf, "%u\n", !!(data->temp_status[nr] & TEMP_FAULT));
+}
+static struct sensor_device_attribute temp_fault[] = {
+       SENSOR_ATTR(temp1_fault, S_IRUGO, show_temp_fault, NULL, 0),
+       SENSOR_ATTR(temp2_fault, S_IRUGO, show_temp_fault, NULL, 1),
+       SENSOR_ATTR(temp3_fault, S_IRUGO, show_temp_fault, NULL, 2),
+};
+
 #define TEMP_UNIT_ATTRS(X) \
        &temp_input[X].dev_attr.attr,   \
        &temp_status[X].dev_attr.attr,  \
        &temp_min[X].dev_attr.attr,     \
        &temp_max[X].dev_attr.attr,     \
-       &temp_crit[X].dev_attr.attr
+       &temp_crit[X].dev_attr.attr,    \
+       &temp_min_alarm[X].dev_attr.attr, \
+       &temp_max_alarm[X].dev_attr.attr, \
+       &temp_crit_alarm[X].dev_attr.attr, \
+       &temp_fault[X].dev_attr.attr
 
 static struct attribute * pc8736x_temp_attr_array[] = {
        TEMP_UNIT_ATTRS(0),
@@ -809,8 +985,8 @@ static const struct attribute_group pc8736x_temp_group = {
        .attrs = pc8736x_temp_attr_array,
 };
 
-static ssize_t show_name(struct device *dev, struct device_attribute
-                        *devattr, char *buf)
+static ssize_t show_name(struct device *dev,
+                       struct device_attribute *devattr, char *buf)
 {
        struct pc87360_data *data = dev_get_drvdata(dev);
        return sprintf(buf, "%s\n", data->name);
@@ -955,7 +1131,7 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
        mutex_init(&data->update_lock);
        platform_set_drvdata(pdev, data);
 
-       for (i = 0; i < 3; i++) {
+       for (i = 0; i < LDNI_MAX; i++) {
                if (((data->address[i] = extra_isa[i]))
                 && !request_region(extra_isa[i], PC87360_EXTENT,
                                    pc87360_driver.driver.name)) {
@@ -1031,7 +1207,15 @@ static int __devinit pc87360_probe(struct platform_device *pdev)
                            || (err = device_create_file(dev,
                                        &temp_crit[i].dev_attr))
                            || (err = device_create_file(dev,
-                                       &temp_status[i].dev_attr)))
+                                       &temp_status[i].dev_attr))
+                           || (err = device_create_file(dev,
+                                       &temp_min_alarm[i].dev_attr))
+                           || (err = device_create_file(dev,
+                                       &temp_max_alarm[i].dev_attr))
+                           || (err = device_create_file(dev,
+                                       &temp_crit_alarm[i].dev_attr))
+                           || (err = device_create_file(dev,
+                                       &temp_fault[i].dev_attr)))
                                goto ERROR3;
                }
                if ((err = device_create_file(dev, &dev_attr_alarms_temp)))
@@ -1131,6 +1315,16 @@ static void pc87360_write_value(struct pc87360_data *data, u8 ldi, u8 bank,
        mutex_unlock(&(data->lock));
 }
 
+/* (temp & vin) channel conversion status register flags (pdf sec.11.5.12) */
+#define CHAN_CNVRTD    0x80    /* new data ready */
+#define CHAN_ENA       0x01    /* enabled channel (temp or vin) */
+#define CHAN_ALM_ENA   0x10    /* propagate to alarms-reg ?? (chk val!) */
+#define CHAN_READY     (CHAN_ENA|CHAN_CNVRTD) /* sample ready mask */
+
+#define TEMP_OTS_OE    0x20    /* OTS Output Enable */
+#define VIN_RW1C_MASK  (CHAN_READY|CHAN_ALM_MAX|CHAN_ALM_MIN)   /* 0x87 */
+#define TEMP_RW1C_MASK (VIN_RW1C_MASK|TEMP_ALM_CRIT|TEMP_FAULT) /* 0xCF */
+
 static void pc87360_init_device(struct platform_device *pdev,
                                int use_thermistors)
 {
@@ -1152,11 +1346,12 @@ static void pc87360_init_device(struct platform_device *pdev,
 
        nr = data->innr < 11 ? data->innr : 11;
        for (i = 0; i < nr; i++) {
+               reg = pc87360_read_value(data, LD_IN, i,
+                                        PC87365_REG_IN_STATUS);
+               dev_dbg(&pdev->dev, "bios in%d status:0x%02x\n", i, reg);
                if (init >= init_in[i]) {
                        /* Forcibly enable voltage channel */
-                       reg = pc87360_read_value(data, LD_IN, i,
-                                                PC87365_REG_IN_STATUS);
-                       if (!(reg & 0x01)) {
+                       if (!(reg & CHAN_ENA)) {
                                dev_dbg(&pdev->dev, "Forcibly "
                                        "enabling in%d\n", i);
                                pc87360_write_value(data, LD_IN, i,
@@ -1168,19 +1363,24 @@ static void pc87360_init_device(struct platform_device *pdev,
 
        /* We can't blindly trust the Super-I/O space configuration bit,
           most BIOS won't set it properly */
+       dev_dbg(&pdev->dev, "bios thermistors:%d\n", use_thermistors);
        for (i = 11; i < data->innr; i++) {
                reg = pc87360_read_value(data, LD_IN, i,
                                         PC87365_REG_TEMP_STATUS);
-               use_thermistors = use_thermistors || (reg & 0x01);
+               use_thermistors = use_thermistors || (reg & CHAN_ENA);
+               /* thermistors are temp[4-6], measured on vin[11-14] */
+               dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i-7, reg);
        }
+       dev_dbg(&pdev->dev, "using thermistors:%d\n", use_thermistors);
 
        i = use_thermistors ? 2 : 0;
        for (; i < data->tempnr; i++) {
+               reg = pc87360_read_value(data, LD_TEMP, i,
+                                        PC87365_REG_TEMP_STATUS);
+               dev_dbg(&pdev->dev, "bios temp%d_status:0x%02x\n", i+1, reg);
                if (init >= init_temp[i]) {
                        /* Forcibly enable temperature channel */
-                       reg = pc87360_read_value(data, LD_TEMP, i,
-                                                PC87365_REG_TEMP_STATUS);
-                       if (!(reg & 0x01)) {
+                       if (!(reg & CHAN_ENA)) {
                                dev_dbg(&pdev->dev, "Forcibly "
                                        "enabling temp%d\n", i+1);
                                pc87360_write_value(data, LD_TEMP, i,
@@ -1197,7 +1397,7 @@ static void pc87360_init_device(struct platform_device *pdev,
                                   diodes */
                                reg = pc87360_read_value(data, LD_TEMP,
                                      (i-11)/2, PC87365_REG_TEMP_STATUS);
-                               if (reg & 0x01) {
+                               if (reg & CHAN_ENA) {
                                        dev_dbg(&pdev->dev, "Skipping "
                                                "temp%d, pin already in use "
                                                "by temp%d\n", i-7, (i-11)/2);
@@ -1207,7 +1407,7 @@ static void pc87360_init_device(struct platform_device *pdev,
                                /* Forcibly enable thermistor channel */
                                reg = pc87360_read_value(data, LD_IN, i,
                                                         PC87365_REG_IN_STATUS);
-                               if (!(reg & 0x01)) {
+                               if (!(reg & CHAN_ENA)) {
                                        dev_dbg(&pdev->dev, "Forcibly "
                                                "enabling temp%d\n", i-7);
                                        pc87360_write_value(data, LD_IN, i,
@@ -1221,7 +1421,8 @@ static void pc87360_init_device(struct platform_device *pdev,
        if (data->innr) {
                reg = pc87360_read_value(data, LD_IN, NO_BANK,
                                         PC87365_REG_IN_CONFIG);
-               if (reg & 0x01) {
+               dev_dbg(&pdev->dev, "bios vin-cfg:0x%02x\n", reg);
+               if (reg & CHAN_ENA) {
                        dev_dbg(&pdev->dev, "Forcibly "
                                "enabling monitoring (VLM)\n");
                        pc87360_write_value(data, LD_IN, NO_BANK,
@@ -1233,7 +1434,8 @@ static void pc87360_init_device(struct platform_device *pdev,
        if (data->tempnr) {
                reg = pc87360_read_value(data, LD_TEMP, NO_BANK,
                                         PC87365_REG_TEMP_CONFIG);
-               if (reg & 0x01) {
+               dev_dbg(&pdev->dev, "bios temp-cfg:0x%02x\n", reg);
+               if (reg & CHAN_ENA) {
                        dev_dbg(&pdev->dev, "Forcibly enabling "
                                "monitoring (TMS)\n");
                        pc87360_write_value(data, LD_TEMP, NO_BANK,
@@ -1336,11 +1538,11 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
                        pc87360_write_value(data, LD_IN, i,
                                            PC87365_REG_IN_STATUS,
                                            data->in_status[i]);
-                       if ((data->in_status[i] & 0x81) == 0x81) {
+                       if ((data->in_status[i] & CHAN_READY) == CHAN_READY) {
                                data->in[i] = pc87360_read_value(data, LD_IN,
                                              i, PC87365_REG_IN);
                        }
-                       if (data->in_status[i] & 0x01) {
+                       if (data->in_status[i] & CHAN_ENA) {
                                data->in_min[i] = pc87360_read_value(data,
                                                  LD_IN, i,
                                                  PC87365_REG_IN_MIN);
@@ -1373,12 +1575,12 @@ static struct pc87360_data *pc87360_update_device(struct device *dev)
                        pc87360_write_value(data, LD_TEMP, i,
                                            PC87365_REG_TEMP_STATUS,
                                            data->temp_status[i]);
-                       if ((data->temp_status[i] & 0x81) == 0x81) {
+                       if ((data->temp_status[i] & CHAN_READY) == CHAN_READY) {
                                data->temp[i] = pc87360_read_value(data,
                                                LD_TEMP, i,
                                                PC87365_REG_TEMP);
                        }
-                       if (data->temp_status[i] & 0x01) {
+                       if (data->temp_status[i] & CHAN_ENA) {
                                data->temp_min[i] = pc87360_read_value(data,
                                                    LD_TEMP, i,
                                                    PC87365_REG_TEMP_MIN);
index 2a4acb2695693b9b3108a6317301efbeef6e1416..d4775528abc69b2d4c4d580d712e48862ef7d1d7 100644 (file)
@@ -460,7 +460,6 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
         */
        at24->bin.attr.name = "eeprom";
        at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
-       at24->bin.attr.owner = THIS_MODULE;
        at24->bin.read = at24_bin_read;
        at24->bin.size = chip.byte_len;
 
index 23be4d42cb02c48da998864f3d13da047e75092b..f3ee4a1abb7714fa2dca513043dce44947d8b962 100644 (file)
@@ -190,7 +190,6 @@ static struct bin_attribute ds1682_eeprom_attr = {
        .attr = {
                .name = "eeprom",
                .mode = S_IRUGO | S_IWUSR,
-               .owner = THIS_MODULE,
        },
        .size = DS1682_EEPROM_SIZE,
        .read = ds1682_eeprom_read,
index 176126d3a01d3054318347cff48dfce8dc2805a1..4b364bae6b3ebc30f6774c167360a205c71ce303 100644 (file)
@@ -832,52 +832,52 @@ static irqreturn_t menelaus_irq(int irq, void *_menelaus)
 
 static void menelaus_to_time(char *regs, struct rtc_time *t)
 {
-       t->tm_sec = BCD2BIN(regs[0]);
-       t->tm_min = BCD2BIN(regs[1]);
+       t->tm_sec = bcd2bin(regs[0]);
+       t->tm_min = bcd2bin(regs[1]);
        if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
-               t->tm_hour = BCD2BIN(regs[2] & 0x1f) - 1;
+               t->tm_hour = bcd2bin(regs[2] & 0x1f) - 1;
                if (regs[2] & RTC_HR_PM)
                        t->tm_hour += 12;
        } else
-               t->tm_hour = BCD2BIN(regs[2] & 0x3f);
-       t->tm_mday = BCD2BIN(regs[3]);
-       t->tm_mon = BCD2BIN(regs[4]) - 1;
-       t->tm_year = BCD2BIN(regs[5]) + 100;
+               t->tm_hour = bcd2bin(regs[2] & 0x3f);
+       t->tm_mday = bcd2bin(regs[3]);
+       t->tm_mon = bcd2bin(regs[4]) - 1;
+       t->tm_year = bcd2bin(regs[5]) + 100;
 }
 
 static int time_to_menelaus(struct rtc_time *t, int regnum)
 {
        int     hour, status;
 
-       status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_sec));
+       status = menelaus_write_reg(regnum++, bin2bcd(t->tm_sec));
        if (status < 0)
                goto fail;
 
-       status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_min));
+       status = menelaus_write_reg(regnum++, bin2bcd(t->tm_min));
        if (status < 0)
                goto fail;
 
        if (the_menelaus->rtc_control & RTC_CTRL_MODE12) {
                hour = t->tm_hour + 1;
                if (hour > 12)
-                       hour = RTC_HR_PM | BIN2BCD(hour - 12);
+                       hour = RTC_HR_PM | bin2bcd(hour - 12);
                else
-                       hour = BIN2BCD(hour);
+                       hour = bin2bcd(hour);
        } else
-               hour = BIN2BCD(t->tm_hour);
+               hour = bin2bcd(t->tm_hour);
        status = menelaus_write_reg(regnum++, hour);
        if (status < 0)
                goto fail;
 
-       status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mday));
+       status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mday));
        if (status < 0)
                goto fail;
 
-       status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_mon + 1));
+       status = menelaus_write_reg(regnum++, bin2bcd(t->tm_mon + 1));
        if (status < 0)
                goto fail;
 
-       status = menelaus_write_reg(regnum++, BIN2BCD(t->tm_year - 100));
+       status = menelaus_write_reg(regnum++, bin2bcd(t->tm_year - 100));
        if (status < 0)
                goto fail;
 
@@ -914,7 +914,7 @@ static int menelaus_read_time(struct device *dev, struct rtc_time *t)
        }
 
        menelaus_to_time(regs, t);
-       t->tm_wday = BCD2BIN(regs[6]);
+       t->tm_wday = bcd2bin(regs[6]);
 
        return 0;
 }
@@ -927,7 +927,7 @@ static int menelaus_set_time(struct device *dev, struct rtc_time *t)
        status = time_to_menelaus(t, MENELAUS_RTC_SEC);
        if (status < 0)
                return status;
-       status = menelaus_write_reg(MENELAUS_RTC_WKDAY, BIN2BCD(t->tm_wday));
+       status = menelaus_write_reg(MENELAUS_RTC_WKDAY, bin2bcd(t->tm_wday));
        if (status < 0) {
                dev_err(&the_menelaus->client->dev, "rtc write reg %02x "
                                "err %d\n", MENELAUS_RTC_WKDAY, status);
index a78d35aecee3de339520308358d6ca565d69cdd1..f1e82a92e61ec09fb879a9b00ec41b5e35b192f4 100644 (file)
@@ -122,7 +122,7 @@ struct cm_counter_attribute {
 
 #define CM_COUNTER_ATTR(_name, _index) \
 struct cm_counter_attribute cm_##_name##_counter_attr = { \
-       .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \
+       .attr = { .name = __stringify(_name), .mode = 0444 }, \
        .index = _index \
 }
 
index c7c770c2898825c8a06c65400b880ea817c90cdc..aa1ff524256e1397012bcde72919d94113df6f18 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/fs.h>
 #include <linux/timer.h>
 #include <linux/poll.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/smp_lock.h>
 
 #include <linux/kernel.h>
@@ -52,6 +51,7 @@
 #include <linux/i2c.h>
 #include <linux/kthread.h>
 #include <asm/unaligned.h>
+#include <asm/byteorder.h>
 
 #include <asm/system.h>
 
index fa8be0731a3f70ee3d6ca40a6301af86d7930a86..a4b1708fafe74eb070fe8d4b3f3c61b440f16358 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <asm/byteorder.h>
 
 #include <linux/dvb/video.h>
 #include <linux/dvb/audio.h>
index bc29436e8a3cd4c3acc93a3c62df8b884d88d478..3733b2afec5f5f92b6c30f4f1c32cbce2cfd327a 100644 (file)
@@ -55,6 +55,7 @@
 #include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
+#include <asm/byteorder.h>
 
 #include <linux/dvb/video.h>
 #include <linux/dvb/audio.h>
index 6e291bf8237adaa0d7c5ea84f8ffda8e9ee440e3..5263913e0c69e8935add96731944373ee968f01d 100644 (file)
@@ -1044,7 +1044,6 @@ static int mspro_block_read_attributes(struct memstick_dev *card)
 
                s_attr->dev_attr.attr.name = s_attr->name;
                s_attr->dev_attr.attr.mode = S_IRUGO;
-               s_attr->dev_attr.attr.owner = THIS_MODULE;
                s_attr->dev_attr.show = mspro_block_attr_show(s_attr->id);
 
                if (!rc)
index 5dabfb69ee537f25a1bc93814c0e894ebf3ba494..4b7c24c519c34cba280c13bf4667e23051bca741 100644 (file)
@@ -82,6 +82,7 @@ static struct key_entry hp_wmi_keymap[] = {
        {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
        {KE_KEY, 0x20e6, KEY_PROG1},
        {KE_KEY, 0x2142, KEY_MEDIA},
+       {KE_KEY, 0x213b, KEY_INFO},
        {KE_KEY, 0x231b, KEY_HELP},
        {KE_END, 0}
 };
index 14f11f8b9e5fd556fcd71dbe4d6178634c851bcc..a90d50c2c3e5535343e859c5df1c40c3be7e464a 100644 (file)
@@ -172,6 +172,11 @@ config MTD_CHAR
          memory chips, and also use ioctl() to obtain information about
          the device, or to erase parts of it.
 
+config HAVE_MTD_OTP
+       bool
+       help
+         Enable access to OTP regions using MTD_CHAR.
+
 config MTD_BLKDEVS
        tristate "Common interface to block layer for MTD 'translation layers'"
        depends on BLOCK
index 479d32b57a1eb54b20b731855274bd9d343a62ae..9408099eec48e59b033900957f8ce0a02c76dbb6 100644 (file)
@@ -6,6 +6,7 @@ menu "RAM/ROM/Flash chip drivers"
 config MTD_CFI
        tristate "Detect flash chips by Common Flash Interface (CFI) probe"
        select MTD_GEN_PROBE
+       select MTD_CFI_UTIL
        help
          The Common Flash Interface specification was developed by Intel,
          AMD and other flash manufactures that provides a universal method
@@ -154,6 +155,7 @@ config MTD_CFI_I8
 config MTD_OTP
        bool "Protection Registers aka one-time programmable (OTP) bits"
        depends on MTD_CFI_ADV_OPTIONS
+       select HAVE_MTD_OTP
        default n
        help
          This enables support for reading, writing and locking so called
@@ -187,7 +189,7 @@ config MTD_CFI_INTELEXT
          StrataFlash and other parts.
 
 config MTD_CFI_AMDSTD
-       tristate "Support for AMD/Fujitsu flash chips"
+       tristate "Support for AMD/Fujitsu/Spansion flash chips"
        depends on MTD_GEN_PROBE
        select MTD_CFI_UTIL
        help
index 5f1b472137a024b96fcff3ea3f9bcd654c7389f8..c93a8be5d5f1f541e96c6e104d5d10f4997bf451 100644 (file)
@@ -478,6 +478,28 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary)
                else
                        cfi->chips[i].erase_time = 2000000;
 
+               if (cfi->cfiq->WordWriteTimeoutTyp &&
+                   cfi->cfiq->WordWriteTimeoutMax)
+                       cfi->chips[i].word_write_time_max =
+                               1<<(cfi->cfiq->WordWriteTimeoutTyp +
+                                   cfi->cfiq->WordWriteTimeoutMax);
+               else
+                       cfi->chips[i].word_write_time_max = 50000 * 8;
+
+               if (cfi->cfiq->BufWriteTimeoutTyp &&
+                   cfi->cfiq->BufWriteTimeoutMax)
+                       cfi->chips[i].buffer_write_time_max =
+                               1<<(cfi->cfiq->BufWriteTimeoutTyp +
+                                   cfi->cfiq->BufWriteTimeoutMax);
+
+               if (cfi->cfiq->BlockEraseTimeoutTyp &&
+                   cfi->cfiq->BlockEraseTimeoutMax)
+                       cfi->chips[i].erase_time_max =
+                               1000<<(cfi->cfiq->BlockEraseTimeoutTyp +
+                                      cfi->cfiq->BlockEraseTimeoutMax);
+               else
+                       cfi->chips[i].erase_time_max = 2000000 * 8;
+
                cfi->chips[i].ref_point_counter = 0;
                init_waitqueue_head(&(cfi->chips[i].wq));
        }
@@ -703,6 +725,10 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long
        struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
        unsigned long timeo = jiffies + HZ;
 
+       /* Prevent setting state FL_SYNCING for chip in suspended state. */
+       if (mode == FL_SYNCING && chip->oldstate != FL_READY)
+               goto sleep;
+
        switch (chip->state) {
 
        case FL_STATUS:
@@ -808,8 +834,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
        DECLARE_WAITQUEUE(wait, current);
 
  retry:
-       if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING
-                          || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) {
+       if (chip->priv &&
+           (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE
+           || mode == FL_SHUTDOWN) && chip->state != FL_SYNCING) {
                /*
                 * OK. We have possibility for contention on the write/erase
                 * operations which are global to the real chip and not per
@@ -859,6 +886,14 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
                                return ret;
                        }
                        spin_lock(&shared->lock);
+
+                       /* We should not own chip if it is already
+                        * in FL_SYNCING state. Put contender and retry. */
+                       if (chip->state == FL_SYNCING) {
+                               put_chip(map, contender, contender->start);
+                               spin_unlock(contender->mutex);
+                               goto retry;
+                       }
                        spin_unlock(contender->mutex);
                }
 
@@ -1012,7 +1047,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
 
 static int __xipram xip_wait_for_operation(
                struct map_info *map, struct flchip *chip,
-               unsigned long adr, unsigned int chip_op_time )
+               unsigned long adr, unsigned int chip_op_time_max)
 {
        struct cfi_private *cfi = map->fldrv_priv;
        struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
@@ -1021,7 +1056,7 @@ static int __xipram xip_wait_for_operation(
        flstate_t oldstate, newstate;
 
                start = xip_currtime();
-       usec = chip_op_time * 8;
+       usec = chip_op_time_max;
        if (usec == 0)
                usec = 500000;
        done = 0;
@@ -1131,8 +1166,8 @@ static int __xipram xip_wait_for_operation(
 #define XIP_INVAL_CACHED_RANGE(map, from, size)  \
        INVALIDATE_CACHED_RANGE(map, from, size)
 
-#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec) \
-       xip_wait_for_operation(map, chip, cmd_adr, usec)
+#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \
+       xip_wait_for_operation(map, chip, cmd_adr, usec_max)
 
 #else
 
@@ -1144,7 +1179,7 @@ static int __xipram xip_wait_for_operation(
 static int inval_cache_and_wait_for_operation(
                struct map_info *map, struct flchip *chip,
                unsigned long cmd_adr, unsigned long inval_adr, int inval_len,
-               unsigned int chip_op_time)
+               unsigned int chip_op_time, unsigned int chip_op_time_max)
 {
        struct cfi_private *cfi = map->fldrv_priv;
        map_word status, status_OK = CMD(0x80);
@@ -1156,8 +1191,7 @@ static int inval_cache_and_wait_for_operation(
                INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len);
        spin_lock(chip->mutex);
 
-       /* set our timeout to 8 times the expected delay */
-       timeo = chip_op_time * 8;
+       timeo = chip_op_time_max;
        if (!timeo)
                timeo = 500000;
        reset_timeo = timeo;
@@ -1217,8 +1251,8 @@ static int inval_cache_and_wait_for_operation(
 
 #endif
 
-#define WAIT_TIMEOUT(map, chip, adr, udelay) \
-       INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay);
+#define WAIT_TIMEOUT(map, chip, adr, udelay, udelay_max) \
+       INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay, udelay_max);
 
 
 static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len)
@@ -1452,7 +1486,8 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 
        ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
                                   adr, map_bankwidth(map),
-                                  chip->word_write_time);
+                                  chip->word_write_time,
+                                  chip->word_write_time_max);
        if (ret) {
                xip_enable(map, chip, adr);
                printk(KERN_ERR "%s: word write error (status timeout)\n", map->name);
@@ -1623,7 +1658,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
        chip->state = FL_WRITING_TO_BUFFER;
        map_write(map, write_cmd, cmd_adr);
-       ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0);
+       ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0, 0);
        if (ret) {
                /* Argh. Not ready for write to buffer */
                map_word Xstatus = map_read(map, cmd_adr);
@@ -1640,7 +1675,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
        /* Figure out the number of words to write */
        word_gap = (-adr & (map_bankwidth(map)-1));
-       words = (len - word_gap + map_bankwidth(map) - 1) / map_bankwidth(map);
+       words = DIV_ROUND_UP(len - word_gap, map_bankwidth(map));
        if (!word_gap) {
                words--;
        } else {
@@ -1692,7 +1727,8 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
        ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr,
                                   initial_adr, initial_len,
-                                  chip->buffer_write_time);
+                                  chip->buffer_write_time,
+                                  chip->buffer_write_time_max);
        if (ret) {
                map_write(map, CMD(0x70), cmd_adr);
                chip->state = FL_STATUS;
@@ -1827,7 +1863,8 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 
        ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
                                   adr, len,
-                                  chip->erase_time);
+                                  chip->erase_time,
+                                  chip->erase_time_max);
        if (ret) {
                map_write(map, CMD(0x70), adr);
                chip->state = FL_STATUS;
@@ -2006,7 +2043,7 @@ static int __xipram do_xxlock_oneblock(struct map_info *map, struct flchip *chip
         */
        udelay = (!extp || !(extp->FeatureSupport & (1 << 5))) ? 1000000/HZ : 0;
 
-       ret = WAIT_TIMEOUT(map, chip, adr, udelay);
+       ret = WAIT_TIMEOUT(map, chip, adr, udelay, udelay * 100);
        if (ret) {
                map_write(map, CMD(0x70), adr);
                chip->state = FL_STATUS;
index a972cc6be436367f797a90f5099aca14a7e8a7a9..3e6f5d8609e8e513b38e5110bc7719a3fce5c1d0 100644 (file)
@@ -13,6 +13,8 @@
  * XIP support hooks by Vitaly Wool (based on code for Intel flash
  * by Nicolas Pitre)
  *
+ * 25/09/2008 Christopher Moore: TopBottom fixup for many Macronix with CFI V1.0
+ *
  * Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com
  *
  * This code is GPL
@@ -43,6 +45,7 @@
 
 #define MANUFACTURER_AMD       0x0001
 #define MANUFACTURER_ATMEL     0x001F
+#define MANUFACTURER_MACRONIX  0x00C2
 #define MANUFACTURER_SST       0x00BF
 #define SST49LF004B            0x0060
 #define SST49LF040B            0x0050
@@ -144,12 +147,44 @@ static void fixup_amd_bootblock(struct mtd_info *mtd, void* param)
 
        if (((major << 8) | minor) < 0x3131) {
                /* CFI version 1.0 => don't trust bootloc */
+
+               DEBUG(MTD_DEBUG_LEVEL1,
+                       "%s: JEDEC Vendor ID is 0x%02X Device ID is 0x%02X\n",
+                       map->name, cfi->mfr, cfi->id);
+
+               /* AFAICS all 29LV400 with a bottom boot block have a device ID
+                * of 0x22BA in 16-bit mode and 0xBA in 8-bit mode.
+                * These were badly detected as they have the 0x80 bit set
+                * so treat them as a special case.
+                */
+               if (((cfi->id == 0xBA) || (cfi->id == 0x22BA)) &&
+
+                       /* Macronix added CFI to their 2nd generation
+                        * MX29LV400C B/T but AFAICS no other 29LV400 (AMD,
+                        * Fujitsu, Spansion, EON, ESI and older Macronix)
+                        * has CFI.
+                        *
+                        * Therefore also check the manufacturer.
+                        * This reduces the risk of false detection due to
+                        * the 8-bit device ID.
+                        */
+                       (cfi->mfr == MANUFACTURER_MACRONIX)) {
+                       DEBUG(MTD_DEBUG_LEVEL1,
+                               "%s: Macronix MX29LV400C with bottom boot block"
+                               " detected\n", map->name);
+                       extp->TopBottom = 2;    /* bottom boot */
+               } else
                if (cfi->id & 0x80) {
                        printk(KERN_WARNING "%s: JEDEC Device ID is 0x%02X. Assuming broken CFI table.\n", map->name, cfi->id);
                        extp->TopBottom = 3;    /* top boot */
                } else {
                        extp->TopBottom = 2;    /* bottom boot */
                }
+
+               DEBUG(MTD_DEBUG_LEVEL1,
+                       "%s: AMD CFI PRI V%c.%c has no boot block field;"
+                       " deduced %s from Device ID\n", map->name, major, minor,
+                       extp->TopBottom == 2 ? "bottom" : "top");
        }
 }
 #endif
@@ -178,10 +213,18 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
        if (atmel_pri.Features & 0x02)
                extp->EraseSuspend = 2;
 
-       if (atmel_pri.BottomBoot)
-               extp->TopBottom = 2;
-       else
-               extp->TopBottom = 3;
+       /* Some chips got it backwards... */
+       if (cfi->id == AT49BV6416) {
+               if (atmel_pri.BottomBoot)
+                       extp->TopBottom = 3;
+               else
+                       extp->TopBottom = 2;
+       } else {
+               if (atmel_pri.BottomBoot)
+                       extp->TopBottom = 2;
+               else
+                       extp->TopBottom = 3;
+       }
 
        /* burst write mode not supported */
        cfi->cfiq->BufWriteTimeoutTyp = 0;
@@ -243,6 +286,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
        { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
 #ifdef AMD_BOOTLOC_BUG
        { CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL },
+       { MANUFACTURER_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock, NULL },
 #endif
        { CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, },
        { CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, },
index c418e92e1d92b79286f46a173bd1f93ff3591939..e63e6749429a29c282323ebc7c5d5949ef284616 100644 (file)
@@ -44,17 +44,14 @@ do { \
 
 #define xip_enable(base, map, cfi) \
 do { \
-       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \
-       cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
+       cfi_qry_mode_off(base, map, cfi);               \
        xip_allowed(base, map); \
 } while (0)
 
 #define xip_disable_qry(base, map, cfi) \
 do { \
        xip_disable(); \
-       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \
-       cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \
-       cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); \
+       cfi_qry_mode_on(base, map, cfi); \
 } while (0)
 
 #else
@@ -70,32 +67,6 @@ do { \
    in: interleave,type,mode
    ret: table index, <0 for error
  */
-static int __xipram qry_present(struct map_info *map, __u32 base,
-                               struct cfi_private *cfi)
-{
-       int osf = cfi->interleave * cfi->device_type;   // scale factor
-       map_word val[3];
-       map_word qry[3];
-
-       qry[0] = cfi_build_cmd('Q', map, cfi);
-       qry[1] = cfi_build_cmd('R', map, cfi);
-       qry[2] = cfi_build_cmd('Y', map, cfi);
-
-       val[0] = map_read(map, base + osf*0x10);
-       val[1] = map_read(map, base + osf*0x11);
-       val[2] = map_read(map, base + osf*0x12);
-
-       if (!map_word_equal(map, qry[0], val[0]))
-               return 0;
-
-       if (!map_word_equal(map, qry[1], val[1]))
-               return 0;
-
-       if (!map_word_equal(map, qry[2], val[2]))
-               return 0;
-
-       return 1;       // "QRY" found
-}
 
 static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
                                   unsigned long *chip_map, struct cfi_private *cfi)
@@ -116,11 +87,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
        }
 
        xip_disable();
-       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-       cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
-       cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-
-       if (!qry_present(map,base,cfi)) {
+       if (!cfi_qry_mode_on(base, map, cfi)) {
                xip_enable(base, map, cfi);
                return 0;
        }
@@ -141,14 +108,13 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
                start = i << cfi->chipshift;
                /* This chip should be in read mode if it's one
                   we've already touched. */
-               if (qry_present(map, start, cfi)) {
+               if (cfi_qry_present(map, start, cfi)) {
                        /* Eep. This chip also had the QRY marker.
                         * Is it an alias for the new one? */
-                       cfi_send_gen_cmd(0xF0, 0, start, map, cfi, cfi->device_type, NULL);
-                       cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
+                       cfi_qry_mode_off(start, map, cfi);
 
                        /* If the QRY marker goes away, it's an alias */
-                       if (!qry_present(map, start, cfi)) {
+                       if (!cfi_qry_present(map, start, cfi)) {
                                xip_allowed(base, map);
                                printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
                                       map->name, base, start);
@@ -158,10 +124,9 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
                         * unfortunate. Stick the new chip in read mode
                         * too and if it's the same, assume it's an alias. */
                        /* FIXME: Use other modes to do a proper check */
-                       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-                       cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL);
+                       cfi_qry_mode_off(base, map, cfi);
 
-                       if (qry_present(map, base, cfi)) {
+                       if (cfi_qry_present(map, base, cfi)) {
                                xip_allowed(base, map);
                                printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n",
                                       map->name, base, start);
@@ -176,8 +141,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base,
        cfi->numchips++;
 
        /* Put it back into Read Mode */
-       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-       cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+       cfi_qry_mode_off(base, map, cfi);
        xip_allowed(base, map);
 
        printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n",
@@ -237,9 +201,7 @@ static int __xipram cfi_chip_setup(struct map_info *map,
                          cfi_read_query(map, base + 0xf * ofs_factor);
 
        /* Put it back into Read Mode */
-       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
-       /* ... even if it's an Intel chip */
-       cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+       cfi_qry_mode_off(base, map, cfi);
        xip_allowed(base, map);
 
        /* Do any necessary byteswapping */
index 0ee457018016083743ca772a38d77325aa6911ae..34d40e25d312952b3cd2186465e17eff5a55d101 100644 (file)
 #include <linux/mtd/cfi.h>
 #include <linux/mtd/compatmac.h>
 
+int __xipram cfi_qry_present(struct map_info *map, __u32 base,
+                            struct cfi_private *cfi)
+{
+       int osf = cfi->interleave * cfi->device_type;   /* scale factor */
+       map_word val[3];
+       map_word qry[3];
+
+       qry[0] = cfi_build_cmd('Q', map, cfi);
+       qry[1] = cfi_build_cmd('R', map, cfi);
+       qry[2] = cfi_build_cmd('Y', map, cfi);
+
+       val[0] = map_read(map, base + osf*0x10);
+       val[1] = map_read(map, base + osf*0x11);
+       val[2] = map_read(map, base + osf*0x12);
+
+       if (!map_word_equal(map, qry[0], val[0]))
+               return 0;
+
+       if (!map_word_equal(map, qry[1], val[1]))
+               return 0;
+
+       if (!map_word_equal(map, qry[2], val[2]))
+               return 0;
+
+       return 1;       /* "QRY" found */
+}
+EXPORT_SYMBOL_GPL(cfi_qry_present);
+
+int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
+                            struct cfi_private *cfi)
+{
+       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+       cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
+       if (cfi_qry_present(map, base, cfi))
+               return 1;
+       /* QRY not found probably we deal with some odd CFI chips */
+       /* Some revisions of some old Intel chips? */
+       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+       cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+       cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
+       if (cfi_qry_present(map, base, cfi))
+               return 1;
+       /* ST M29DW chips */
+       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+       cfi_send_gen_cmd(0x98, 0x555, base, map, cfi, cfi->device_type, NULL);
+       if (cfi_qry_present(map, base, cfi))
+               return 1;
+       /* QRY not found */
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cfi_qry_mode_on);
+
+void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
+                              struct cfi_private *cfi)
+{
+       cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL);
+       cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL);
+}
+EXPORT_SYMBOL_GPL(cfi_qry_mode_off);
+
 struct cfi_extquery *
 __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name)
 {
@@ -48,8 +108,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
 #endif
 
        /* Switch it into Query Mode */
-       cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL);
-
+       cfi_qry_mode_on(base, map, cfi);
        /* Read in the Extended Query Table */
        for (i=0; i<size; i++) {
                ((unsigned char *)extp)[i] =
@@ -57,8 +116,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n
        }
 
        /* Make sure it returns to read mode */
-       cfi_send_gen_cmd(0xf0, 0, base, map, cfi, cfi->device_type, NULL);
-       cfi_send_gen_cmd(0xff, 0, base, map, cfi, cfi->device_type, NULL);
+       cfi_qry_mode_off(base, map, cfi);
 
 #ifdef CONFIG_MTD_XIP
        (void) map_read(map, base);
index f061885b2812dc5d04bba96f488aa6fa18a9ae49..e2dc96441e05e0a6affa6c5fa2611fef0f545d9e 100644 (file)
@@ -111,7 +111,7 @@ static struct cfi_private *genprobe_ident_chips(struct map_info *map, struct chi
                max_chips = 1;
        }
 
-       mapsize = sizeof(long) * ( (max_chips + BITS_PER_LONG-1) / BITS_PER_LONG );
+       mapsize = sizeof(long) * DIV_ROUND_UP(max_chips, BITS_PER_LONG);
        chip_map = kzalloc(mapsize, GFP_KERNEL);
        if (!chip_map) {
                printk(KERN_WARNING "%s: kmalloc failed for CFI chip map\n", map->name);
index 71bc07f149b7910f964a5bc9b4c678a149cfbdf9..50a340388e742024945b6011490c760d0de4dcad 100644 (file)
@@ -7,6 +7,7 @@
  *
  * mtdparts=<mtddef>[;<mtddef]
  * <mtddef>  := <mtd-id>:<partdef>[,<partdef>]
+ *              where <mtd-id> is the name from the "cat /proc/mtd" command
  * <partdef> := <size>[@offset][<name>][ro][lk]
  * <mtd-id>  := unique name used in mapping driver/device (mtd->name)
  * <size>    := standard linux memsize OR "-" to denote all remaining space
index 9c613f06623cd4d1191eafb7e6d4adfb75736037..6fde0a2e3567d589fb4757a12eb49929b1c6c463 100644 (file)
@@ -59,6 +59,27 @@ config MTD_DATAFLASH
          Sometimes DataFlash chips are packaged inside MMC-format
          cards; at this writing, the MMC stack won't handle those.
 
+config MTD_DATAFLASH_WRITE_VERIFY
+       bool "Verify DataFlash page writes"
+       depends on MTD_DATAFLASH
+       help
+         This adds an extra check when data is written to the flash.
+         It may help if you are verifying chip setup (timings etc) on
+         your board.  There is a rare possibility that even though the
+         device thinks the write was successful, a bit could have been
+         flipped accidentally due to device wear or something else.
+
+config MTD_DATAFLASH_OTP
+       bool "DataFlash OTP support (Security Register)"
+       depends on MTD_DATAFLASH
+       select HAVE_MTD_OTP
+       help
+         Newer DataFlash chips (revisions C and D) support 128 bytes of
+         one-time-programmable (OTP) data.  The first half may be written
+         (once) with up to 64 bytes of data, such as a serial number or
+         other key product data.  The second half is programmed with a
+         unique-to-each-chip bit pattern at the factory.
+
 config MTD_M25P80
        tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
        depends on SPI_MASTER && EXPERIMENTAL
index b35c3333e210b61878493d3fd052e1228b38d6db..76a76751da3660ce63123844d97eca891d2cbab2 100644 (file)
@@ -39,6 +39,7 @@
 #define        OPCODE_PP               0x02    /* Page program (up to 256 bytes) */
 #define        OPCODE_BE_4K            0x20    /* Erase 4KiB block */
 #define        OPCODE_BE_32K           0x52    /* Erase 32KiB block */
+#define        OPCODE_BE               0xc7    /* Erase whole flash block */
 #define        OPCODE_SE               0xd8    /* Sector erase (usually 64KiB) */
 #define        OPCODE_RDID             0x9f    /* Read JEDEC ID */
 
@@ -161,6 +162,31 @@ static int wait_till_ready(struct m25p *flash)
        return 1;
 }
 
+/*
+ * Erase the whole flash memory
+ *
+ * Returns 0 if successful, non-zero otherwise.
+ */
+static int erase_block(struct m25p *flash)
+{
+       DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %dKiB\n",
+                       flash->spi->dev.bus_id, __func__,
+                       flash->mtd.size / 1024);
+
+       /* Wait until finished previous write command. */
+       if (wait_till_ready(flash))
+               return 1;
+
+       /* Send write enable, then erase commands. */
+       write_enable(flash);
+
+       /* Set up command buffer. */
+       flash->command[0] = OPCODE_BE;
+
+       spi_write(flash->spi, flash->command, 1);
+
+       return 0;
+}
 
 /*
  * Erase one sector of flash memory at offset ``offset'' which is any
@@ -229,15 +255,21 @@ static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
         */
 
        /* now erase those sectors */
-       while (len) {
-               if (erase_sector(flash, addr)) {
-                       instr->state = MTD_ERASE_FAILED;
-                       mutex_unlock(&flash->lock);
-                       return -EIO;
-               }
+       if (len == flash->mtd.size && erase_block(flash)) {
+               instr->state = MTD_ERASE_FAILED;
+               mutex_unlock(&flash->lock);
+               return -EIO;
+       } else {
+               while (len) {
+                       if (erase_sector(flash, addr)) {
+                               instr->state = MTD_ERASE_FAILED;
+                               mutex_unlock(&flash->lock);
+                               return -EIO;
+                       }
 
-               addr += mtd->erasesize;
-               len -= mtd->erasesize;
+                       addr += mtd->erasesize;
+                       len -= mtd->erasesize;
+               }
        }
 
        mutex_unlock(&flash->lock);
@@ -437,6 +469,7 @@ struct flash_info {
         * then a two byte device id.
         */
        u32             jedec_id;
+       u16             ext_id;
 
        /* The size listed here is what works with OPCODE_SE, which isn't
         * necessarily called a "sector" by the vendor.
@@ -456,72 +489,75 @@ struct flash_info {
 static struct flash_info __devinitdata m25p_data [] = {
 
        /* Atmel -- some are (confusingly) marketed as "DataFlash" */
-       { "at25fs010",  0x1f6601, 32 * 1024, 4, SECT_4K, },
-       { "at25fs040",  0x1f6604, 64 * 1024, 8, SECT_4K, },
+       { "at25fs010",  0x1f6601, 0, 32 * 1024, 4, SECT_4K, },
+       { "at25fs040",  0x1f6604, 0, 64 * 1024, 8, SECT_4K, },
 
-       { "at25df041a", 0x1f4401, 64 * 1024, 8, SECT_4K, },
-       { "at25df641",  0x1f4800, 64 * 1024, 128, SECT_4K, },
+       { "at25df041a", 0x1f4401, 0, 64 * 1024, 8, SECT_4K, },
+       { "at25df641",  0x1f4800, 0, 64 * 1024, 128, SECT_4K, },
 
-       { "at26f004",   0x1f0400, 64 * 1024, 8, SECT_4K, },
-       { "at26df081a", 0x1f4501, 64 * 1024, 16, SECT_4K, },
-       { "at26df161a", 0x1f4601, 64 * 1024, 32, SECT_4K, },
-       { "at26df321",  0x1f4701, 64 * 1024, 64, SECT_4K, },
+       { "at26f004",   0x1f0400, 0, 64 * 1024, 8, SECT_4K, },
+       { "at26df081a", 0x1f4501, 0, 64 * 1024, 16, SECT_4K, },
+       { "at26df161a", 0x1f4601, 0, 64 * 1024, 32, SECT_4K, },
+       { "at26df321",  0x1f4701, 0, 64 * 1024, 64, SECT_4K, },
 
        /* Spansion -- single (large) sector size only, at least
         * for the chips listed here (without boot sectors).
         */
-       { "s25sl004a", 0x010212, 64 * 1024, 8, },
-       { "s25sl008a", 0x010213, 64 * 1024, 16, },
-       { "s25sl016a", 0x010214, 64 * 1024, 32, },
-       { "s25sl032a", 0x010215, 64 * 1024, 64, },
-       { "s25sl064a", 0x010216, 64 * 1024, 128, },
+       { "s25sl004a", 0x010212, 0, 64 * 1024, 8, },
+       { "s25sl008a", 0x010213, 0, 64 * 1024, 16, },
+       { "s25sl016a", 0x010214, 0, 64 * 1024, 32, },
+       { "s25sl032a", 0x010215, 0, 64 * 1024, 64, },
+       { "s25sl064a", 0x010216, 0, 64 * 1024, 128, },
+        { "s25sl12800", 0x012018, 0x0300, 256 * 1024, 64, },
+       { "s25sl12801", 0x012018, 0x0301, 64 * 1024, 256, },
 
        /* SST -- large erase sizes are "overlays", "sectors" are 4K */
-       { "sst25vf040b", 0xbf258d, 64 * 1024, 8, SECT_4K, },
-       { "sst25vf080b", 0xbf258e, 64 * 1024, 16, SECT_4K, },
-       { "sst25vf016b", 0xbf2541, 64 * 1024, 32, SECT_4K, },
-       { "sst25vf032b", 0xbf254a, 64 * 1024, 64, SECT_4K, },
+       { "sst25vf040b", 0xbf258d, 0, 64 * 1024, 8, SECT_4K, },
+       { "sst25vf080b", 0xbf258e, 0, 64 * 1024, 16, SECT_4K, },
+       { "sst25vf016b", 0xbf2541, 0, 64 * 1024, 32, SECT_4K, },
+       { "sst25vf032b", 0xbf254a, 0, 64 * 1024, 64, SECT_4K, },
 
        /* ST Microelectronics -- newer production may have feature updates */
-       { "m25p05",  0x202010,  32 * 1024, 2, },
-       { "m25p10",  0x202011,  32 * 1024, 4, },
-       { "m25p20",  0x202012,  64 * 1024, 4, },
-       { "m25p40",  0x202013,  64 * 1024, 8, },
-       { "m25p80",         0,  64 * 1024, 16, },
-       { "m25p16",  0x202015,  64 * 1024, 32, },
-       { "m25p32",  0x202016,  64 * 1024, 64, },
-       { "m25p64",  0x202017,  64 * 1024, 128, },
-       { "m25p128", 0x202018, 256 * 1024, 64, },
-
-       { "m45pe80", 0x204014,  64 * 1024, 16, },
-       { "m45pe16", 0x204015,  64 * 1024, 32, },
-
-       { "m25pe80", 0x208014,  64 * 1024, 16, },
-       { "m25pe16", 0x208015,  64 * 1024, 32, SECT_4K, },
+       { "m25p05",  0x202010,  0, 32 * 1024, 2, },
+       { "m25p10",  0x202011,  0, 32 * 1024, 4, },
+       { "m25p20",  0x202012,  0, 64 * 1024, 4, },
+       { "m25p40",  0x202013,  0, 64 * 1024, 8, },
+       { "m25p80",         0,  0, 64 * 1024, 16, },
+       { "m25p16",  0x202015,  0, 64 * 1024, 32, },
+       { "m25p32",  0x202016,  0, 64 * 1024, 64, },
+       { "m25p64",  0x202017,  0, 64 * 1024, 128, },
+       { "m25p128", 0x202018, 0, 256 * 1024, 64, },
+
+       { "m45pe80", 0x204014,  0, 64 * 1024, 16, },
+       { "m45pe16", 0x204015,  0, 64 * 1024, 32, },
+
+       { "m25pe80", 0x208014,  0, 64 * 1024, 16, },
+       { "m25pe16", 0x208015,  0, 64 * 1024, 32, SECT_4K, },
 
        /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
-       { "w25x10", 0xef3011, 64 * 1024, 2, SECT_4K, },
-       { "w25x20", 0xef3012, 64 * 1024, 4, SECT_4K, },
-       { "w25x40", 0xef3013, 64 * 1024, 8, SECT_4K, },
-       { "w25x80", 0xef3014, 64 * 1024, 16, SECT_4K, },
-       { "w25x16", 0xef3015, 64 * 1024, 32, SECT_4K, },
-       { "w25x32", 0xef3016, 64 * 1024, 64, SECT_4K, },
-       { "w25x64", 0xef3017, 64 * 1024, 128, SECT_4K, },
+       { "w25x10", 0xef3011, 0, 64 * 1024, 2, SECT_4K, },
+       { "w25x20", 0xef3012, 0, 64 * 1024, 4, SECT_4K, },
+       { "w25x40", 0xef3013, 0, 64 * 1024, 8, SECT_4K, },
+       { "w25x80", 0xef3014, 0, 64 * 1024, 16, SECT_4K, },
+       { "w25x16", 0xef3015, 0, 64 * 1024, 32, SECT_4K, },
+       { "w25x32", 0xef3016, 0, 64 * 1024, 64, SECT_4K, },
+       { "w25x64", 0xef3017, 0, 64 * 1024, 128, SECT_4K, },
 };
 
 static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
 {
        int                     tmp;
        u8                      code = OPCODE_RDID;
-       u8                      id[3];
+       u8                      id[5];
        u32                     jedec;
+       u16                     ext_jedec;
        struct flash_info       *info;
 
        /* JEDEC also defines an optional "extended device information"
         * string for after vendor-specific data, after the three bytes
         * we use here.  Supporting some chips might require using it.
         */
-       tmp = spi_write_then_read(spi, &code, 1, id, 3);
+       tmp = spi_write_then_read(spi, &code, 1, id, 5);
        if (tmp < 0) {
                DEBUG(MTD_DEBUG_LEVEL0, "%s: error %d reading JEDEC ID\n",
                        spi->dev.bus_id, tmp);
@@ -533,10 +569,14 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
        jedec = jedec << 8;
        jedec |= id[2];
 
+       ext_jedec = id[3] << 8 | id[4];
+
        for (tmp = 0, info = m25p_data;
                        tmp < ARRAY_SIZE(m25p_data);
                        tmp++, info++) {
                if (info->jedec_id == jedec)
+                       if (ext_jedec != 0 && info->ext_id != ext_jedec)
+                               continue;
                        return info;
        }
        dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec);
index 8bd0dea6885f4ad91288d8f5313f08cb636fe10e..6dd9aff8bb2d37181fb711c98f9d839f1fd52a15 100644 (file)
  * doesn't (yet) use these for any kind of i/o overlap or prefetching.
  *
  * Sometimes DataFlash is packaged in MMC-format cards, although the
- * MMC stack can't use SPI (yet), or distinguish between MMC and DataFlash
+ * MMC stack can't (yet?) distinguish between MMC and DataFlash
  * protocols during enumeration.
  */
 
-#define CONFIG_DATAFLASH_WRITE_VERIFY
-
 /* reads can bypass the buffers */
 #define OP_READ_CONTINUOUS     0xE8
 #define OP_READ_PAGE           0xD2
@@ -80,7 +78,8 @@
  */
 #define OP_READ_ID             0x9F
 #define OP_READ_SECURITY       0x77
-#define OP_WRITE_SECURITY      0x9A    /* OTP bits */
+#define OP_WRITE_SECURITY_REVC 0x9A
+#define OP_WRITE_SECURITY      0x9B    /* revision D */
 
 
 struct dataflash {
@@ -402,7 +401,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
                (void) dataflash_waitready(priv->spi);
 
 
-#ifdef CONFIG_DATAFLASH_WRITE_VERIFY
+#ifdef CONFIG_MTD_DATAFLASH_VERIFY_WRITE
 
                /* (3) Compare to Buffer1 */
                addr = pageaddr << priv->page_offset;
@@ -431,7 +430,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
                } else
                        status = 0;
 
-#endif /* CONFIG_DATAFLASH_WRITE_VERIFY */
+#endif /* CONFIG_MTD_DATAFLASH_VERIFY_WRITE */
 
                remaining = remaining - writelen;
                pageaddr++;
@@ -451,16 +450,192 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 /* ......................................................................... */
 
+#ifdef CONFIG_MTD_DATAFLASH_OTP
+
+static int dataflash_get_otp_info(struct mtd_info *mtd,
+               struct otp_info *info, size_t len)
+{
+       /* Report both blocks as identical:  bytes 0..64, locked.
+        * Unless the user block changed from all-ones, we can't
+        * tell whether it's still writable; so we assume it isn't.
+        */
+       info->start = 0;
+       info->length = 64;
+       info->locked = 1;
+       return sizeof(*info);
+}
+
+static ssize_t otp_read(struct spi_device *spi, unsigned base,
+               uint8_t *buf, loff_t off, size_t len)
+{
+       struct spi_message      m;
+       size_t                  l;
+       uint8_t                 *scratch;
+       struct spi_transfer     t;
+       int                     status;
+
+       if (off > 64)
+               return -EINVAL;
+
+       if ((off + len) > 64)
+               len = 64 - off;
+       if (len == 0)
+               return len;
+
+       spi_message_init(&m);
+
+       l = 4 + base + off + len;
+       scratch = kzalloc(l, GFP_KERNEL);
+       if (!scratch)
+               return -ENOMEM;
+
+       /* OUT: OP_READ_SECURITY, 3 don't-care bytes, zeroes
+        * IN:  ignore 4 bytes, data bytes 0..N (max 127)
+        */
+       scratch[0] = OP_READ_SECURITY;
+
+       memset(&t, 0, sizeof t);
+       t.tx_buf = scratch;
+       t.rx_buf = scratch;
+       t.len = l;
+       spi_message_add_tail(&t, &m);
+
+       dataflash_waitready(spi);
+
+       status = spi_sync(spi, &m);
+       if (status >= 0) {
+               memcpy(buf, scratch + 4 + base + off, len);
+               status = len;
+       }
+
+       kfree(scratch);
+       return status;
+}
+
+static int dataflash_read_fact_otp(struct mtd_info *mtd,
+               loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+       struct dataflash        *priv = (struct dataflash *)mtd->priv;
+       int                     status;
+
+       /* 64 bytes, from 0..63 ... start at 64 on-chip */
+       mutex_lock(&priv->lock);
+       status = otp_read(priv->spi, 64, buf, from, len);
+       mutex_unlock(&priv->lock);
+
+       if (status < 0)
+               return status;
+       *retlen = status;
+       return 0;
+}
+
+static int dataflash_read_user_otp(struct mtd_info *mtd,
+               loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+       struct dataflash        *priv = (struct dataflash *)mtd->priv;
+       int                     status;
+
+       /* 64 bytes, from 0..63 ... start at 0 on-chip */
+       mutex_lock(&priv->lock);
+       status = otp_read(priv->spi, 0, buf, from, len);
+       mutex_unlock(&priv->lock);
+
+       if (status < 0)
+               return status;
+       *retlen = status;
+       return 0;
+}
+
+static int dataflash_write_user_otp(struct mtd_info *mtd,
+               loff_t from, size_t len, size_t *retlen, u_char *buf)
+{
+       struct spi_message      m;
+       const size_t            l = 4 + 64;
+       uint8_t                 *scratch;
+       struct spi_transfer     t;
+       struct dataflash        *priv = (struct dataflash *)mtd->priv;
+       int                     status;
+
+       if (len > 64)
+               return -EINVAL;
+
+       /* Strictly speaking, we *could* truncate the write ... but
+        * let's not do that for the only write that's ever possible.
+        */
+       if ((from + len) > 64)
+               return -EINVAL;
+
+       /* OUT: OP_WRITE_SECURITY, 3 zeroes, 64 data-or-zero bytes
+        * IN:  ignore all
+        */
+       scratch = kzalloc(l, GFP_KERNEL);
+       if (!scratch)
+               return -ENOMEM;
+       scratch[0] = OP_WRITE_SECURITY;
+       memcpy(scratch + 4 + from, buf, len);
+
+       spi_message_init(&m);
+
+       memset(&t, 0, sizeof t);
+       t.tx_buf = scratch;
+       t.len = l;
+       spi_message_add_tail(&t, &m);
+
+       /* Write the OTP bits, if they've not yet been written.
+        * This modifies SRAM buffer1.
+        */
+       mutex_lock(&priv->lock);
+       dataflash_waitready(priv->spi);
+       status = spi_sync(priv->spi, &m);
+       mutex_unlock(&priv->lock);
+
+       kfree(scratch);
+
+       if (status >= 0) {
+               status = 0;
+               *retlen = len;
+       }
+       return status;
+}
+
+static char *otp_setup(struct mtd_info *device, char revision)
+{
+       device->get_fact_prot_info = dataflash_get_otp_info;
+       device->read_fact_prot_reg = dataflash_read_fact_otp;
+       device->get_user_prot_info = dataflash_get_otp_info;
+       device->read_user_prot_reg = dataflash_read_user_otp;
+
+       /* rev c parts (at45db321c and at45db1281 only!) use a
+        * different write procedure; not (yet?) implemented.
+        */
+       if (revision > 'c')
+               device->write_user_prot_reg = dataflash_write_user_otp;
+
+       return ", OTP";
+}
+
+#else
+
+static char *otp_setup(struct mtd_info *device, char revision)
+{
+       return " (OTP)";
+}
+
+#endif
+
+/* ......................................................................... */
+
 /*
  * Register DataFlash device with MTD subsystem.
  */
 static int __devinit
-add_dataflash(struct spi_device *spi, char *name,
-               int nr_pages, int pagesize, int pageoffset)
+add_dataflash_otp(struct spi_device *spi, char *name,
+               int nr_pages, int pagesize, int pageoffset, char revision)
 {
        struct dataflash                *priv;
        struct mtd_info                 *device;
        struct flash_platform_data      *pdata = spi->dev.platform_data;
+       char                            *otp_tag = "";
 
        priv = kzalloc(sizeof *priv, GFP_KERNEL);
        if (!priv)
@@ -489,8 +664,12 @@ add_dataflash(struct spi_device *spi, char *name,
        device->write = dataflash_write;
        device->priv = priv;
 
-       dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes\n",
-                       name, DIV_ROUND_UP(device->size, 1024), pagesize);
+       if (revision >= 'c')
+               otp_tag = otp_setup(device, revision);
+
+       dev_info(&spi->dev, "%s (%d KBytes) pagesize %d bytes%s\n",
+                       name, DIV_ROUND_UP(device->size, 1024),
+                       pagesize, otp_tag);
        dev_set_drvdata(&spi->dev, priv);
 
        if (mtd_has_partitions()) {
@@ -519,6 +698,14 @@ add_dataflash(struct spi_device *spi, char *name,
        return add_mtd_device(device) == 1 ? -ENODEV : 0;
 }
 
+static inline int __devinit
+add_dataflash(struct spi_device *spi, char *name,
+               int nr_pages, int pagesize, int pageoffset)
+{
+       return add_dataflash_otp(spi, name, nr_pages, pagesize,
+                       pageoffset, 0);
+}
+
 struct flash_info {
        char            *name;
 
@@ -664,13 +851,16 @@ static int __devinit dataflash_probe(struct spi_device *spi)
         * Try to detect dataflash by JEDEC ID.
         * If it succeeds we know we have either a C or D part.
         * D will support power of 2 pagesize option.
+        * Both support the security register, though with different
+        * write procedures.
         */
        info = jedec_probe(spi);
        if (IS_ERR(info))
                return PTR_ERR(info);
        if (info != NULL)
-               return add_dataflash(spi, info->name, info->nr_pages,
-                                info->pagesize, info->pageoffset);
+               return add_dataflash_otp(spi, info->name, info->nr_pages,
+                               info->pagesize, info->pageoffset,
+                               (info->flags & SUP_POW2PS) ? 'd' : 'c');
 
        /*
         * Older chips support only legacy commands, identifing
index c4f9d3378b24c658f246d8777d47735050cfdffe..50ce13887f63b6b81d2525a5070fbfc511643b1c 100644 (file)
@@ -388,6 +388,10 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
                if (thisEUN == targetEUN)
                        break;
 
+               /* Unlink the last block from the chain. */
+               inftl->PUtable[prevEUN] = BLOCK_NIL;
+
+               /* Now try to erase it. */
                if (INFTL_formatblock(inftl, thisEUN) < 0) {
                        /*
                         * Could not erase : mark block as reserved.
@@ -396,7 +400,6 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
                } else {
                        /* Correctly erased : mark it as free */
                        inftl->PUtable[thisEUN] = BLOCK_FREE;
-                       inftl->PUtable[prevEUN] = BLOCK_NIL;
                        inftl->numfreeEUNs++;
                }
        }
index df8e00bba07b2726f71fbf02b6b1d27ff0cf396a..5ea1693621642b84fbcbe482202395d493b0a91b 100644 (file)
@@ -332,30 +332,6 @@ config MTD_CFI_FLAGADM
          Mapping for the Flaga digital module. If you don't have one, ignore
          this setting.
 
-config MTD_WALNUT
-       tristate "Flash device mapped on IBM 405GP Walnut"
-       depends on MTD_JEDECPROBE && WALNUT && !PPC_MERGE
-       help
-         This enables access routines for the flash chips on the IBM 405GP
-         Walnut board. If you have one of these boards and would like to
-         use the flash chips on it, say 'Y'.
-
-config MTD_EBONY
-       tristate "Flash devices mapped on IBM 440GP Ebony"
-       depends on MTD_JEDECPROBE && EBONY && !PPC_MERGE
-       help
-         This enables access routines for the flash chips on the IBM 440GP
-         Ebony board. If you have one of these boards and would like to
-         use the flash chips on it, say 'Y'.
-
-config MTD_OCOTEA
-       tristate "Flash devices mapped on IBM 440GX Ocotea"
-       depends on MTD_CFI && OCOTEA && !PPC_MERGE
-       help
-         This enables access routines for the flash chips on the IBM 440GX
-         Ocotea board. If you have one of these boards and would like to
-         use the flash chips on it, say 'Y'.
-
 config MTD_REDWOOD
        tristate "CFI Flash devices mapped on IBM Redwood"
        depends on MTD_CFI && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 )
@@ -458,13 +434,6 @@ config MTD_CEIVA
          PhotoMax Digital Picture Frame.
          If you have such a device, say 'Y'.
 
-config MTD_NOR_TOTO
-       tristate "NOR Flash device on TOTO board"
-       depends on ARCH_OMAP && OMAP_TOTO
-       help
-         This enables access to the NOR flash on the Texas Instruments
-         TOTO board.
-
 config MTD_H720X
        tristate "Hynix evaluation board mappings"
        depends on MTD_CFI && ( ARCH_H7201 || ARCH_H7202 )
@@ -522,7 +491,7 @@ config MTD_BFIN_ASYNC
 
 config MTD_UCLINUX
        tristate "Generic uClinux RAM/ROM filesystem support"
-       depends on MTD_PARTITIONS && !MMU
+       depends on MTD_PARTITIONS && MTD_RAM && !MMU
        help
          Map driver to support image based filesystems for uClinux.
 
index 6cda6df973e5d11c187533e48534ac5ff60c6038..6d9ba35caf11a26bbeb4caefec9eae1dddcd9940 100644 (file)
@@ -50,12 +50,8 @@ obj-$(CONFIG_MTD_REDWOOD)    += redwood.o
 obj-$(CONFIG_MTD_UCLINUX)      += uclinux.o
 obj-$(CONFIG_MTD_NETtel)       += nettel.o
 obj-$(CONFIG_MTD_SCB2_FLASH)   += scb2_flash.o
-obj-$(CONFIG_MTD_EBONY)                += ebony.o
-obj-$(CONFIG_MTD_OCOTEA)       += ocotea.o
-obj-$(CONFIG_MTD_WALNUT)        += walnut.o
 obj-$(CONFIG_MTD_H720X)                += h720x-flash.o
 obj-$(CONFIG_MTD_SBC8240)      += sbc8240.o
-obj-$(CONFIG_MTD_NOR_TOTO)     += omap-toto-flash.o
 obj-$(CONFIG_MTD_IXP4XX)       += ixp4xx.o
 obj-$(CONFIG_MTD_IXP2000)      += ixp2000.o
 obj-$(CONFIG_MTD_WRSBC8260)    += wr_sbc82xx_flash.o
diff --git a/drivers/mtd/maps/ebony.c b/drivers/mtd/maps/ebony.c
deleted file mode 100644 (file)
index d92b7c7..0000000
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Mapping for Ebony user flash
- *
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * Copyright 2002-2004 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm44x.h>
-#include <platforms/4xx/ebony.h>
-
-static struct mtd_info *flash;
-
-static struct map_info ebony_small_map = {
-       .name =         "Ebony small flash",
-       .size =         EBONY_SMALL_FLASH_SIZE,
-       .bankwidth =    1,
-};
-
-static struct map_info ebony_large_map = {
-       .name =         "Ebony large flash",
-       .size =         EBONY_LARGE_FLASH_SIZE,
-       .bankwidth =    1,
-};
-
-static struct mtd_partition ebony_small_partitions[] = {
-       {
-               .name =   "OpenBIOS",
-               .offset = 0x0,
-               .size =   0x80000,
-       }
-};
-
-static struct mtd_partition ebony_large_partitions[] = {
-       {
-               .name =   "fs",
-               .offset = 0,
-               .size =   0x380000,
-       },
-       {
-               .name =   "firmware",
-               .offset = 0x380000,
-               .size =   0x80000,
-       }
-};
-
-int __init init_ebony(void)
-{
-       u8 fpga0_reg;
-       u8 __iomem *fpga0_adr;
-       unsigned long long small_flash_base, large_flash_base;
-
-       fpga0_adr = ioremap64(EBONY_FPGA_ADDR, 16);
-       if (!fpga0_adr)
-               return -ENOMEM;
-
-       fpga0_reg = readb(fpga0_adr);
-       iounmap(fpga0_adr);
-
-       if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-                       !EBONY_FLASH_SEL(fpga0_reg))
-               small_flash_base = EBONY_SMALL_FLASH_HIGH2;
-       else if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-                       EBONY_FLASH_SEL(fpga0_reg))
-               small_flash_base = EBONY_SMALL_FLASH_HIGH1;
-       else if (!EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-                       !EBONY_FLASH_SEL(fpga0_reg))
-               small_flash_base = EBONY_SMALL_FLASH_LOW2;
-       else
-               small_flash_base = EBONY_SMALL_FLASH_LOW1;
-
-       if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) &&
-                       !EBONY_ONBRD_FLASH_EN(fpga0_reg))
-               large_flash_base = EBONY_LARGE_FLASH_LOW;
-       else
-               large_flash_base = EBONY_LARGE_FLASH_HIGH;
-
-       ebony_small_map.phys = small_flash_base;
-       ebony_small_map.virt = ioremap64(small_flash_base,
-                                        ebony_small_map.size);
-
-       if (!ebony_small_map.virt) {
-               printk("Failed to ioremap flash\n");
-               return -EIO;
-       }
-
-       simple_map_init(&ebony_small_map);
-
-       flash = do_map_probe("jedec_probe", &ebony_small_map);
-       if (flash) {
-               flash->owner = THIS_MODULE;
-               add_mtd_partitions(flash, ebony_small_partitions,
-                                       ARRAY_SIZE(ebony_small_partitions));
-       } else {
-               printk("map probe failed for flash\n");
-               iounmap(ebony_small_map.virt);
-               return -ENXIO;
-       }
-
-       ebony_large_map.phys = large_flash_base;
-       ebony_large_map.virt = ioremap64(large_flash_base,
-                                        ebony_large_map.size);
-
-       if (!ebony_large_map.virt) {
-               printk("Failed to ioremap flash\n");
-               iounmap(ebony_small_map.virt);
-               return -EIO;
-       }
-
-       simple_map_init(&ebony_large_map);
-
-       flash = do_map_probe("jedec_probe", &ebony_large_map);
-       if (flash) {
-               flash->owner = THIS_MODULE;
-               add_mtd_partitions(flash, ebony_large_partitions,
-                                       ARRAY_SIZE(ebony_large_partitions));
-       } else {
-               printk("map probe failed for flash\n");
-               iounmap(ebony_small_map.virt);
-               iounmap(ebony_large_map.virt);
-               return -ENXIO;
-       }
-
-       return 0;
-}
-
-static void __exit cleanup_ebony(void)
-{
-       if (flash) {
-               del_mtd_partitions(flash);
-               map_destroy(flash);
-       }
-
-       if (ebony_small_map.virt) {
-               iounmap(ebony_small_map.virt);
-               ebony_small_map.virt = NULL;
-       }
-
-       if (ebony_large_map.virt) {
-               iounmap(ebony_large_map.virt);
-               ebony_large_map.virt = NULL;
-       }
-}
-
-module_init(init_ebony);
-module_exit(cleanup_ebony);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 440GP Ebony boards");
diff --git a/drivers/mtd/maps/ocotea.c b/drivers/mtd/maps/ocotea.c
deleted file mode 100644 (file)
index 5522eac..0000000
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Mapping for Ocotea user flash
- *
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * Copyright 2002-2004 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm44x.h>
-#include <platforms/4xx/ocotea.h>
-
-static struct mtd_info *flash;
-
-static struct map_info ocotea_small_map = {
-       .name =         "Ocotea small flash",
-       .size =         OCOTEA_SMALL_FLASH_SIZE,
-       .buswidth =     1,
-};
-
-static struct map_info ocotea_large_map = {
-       .name =         "Ocotea large flash",
-       .size =         OCOTEA_LARGE_FLASH_SIZE,
-       .buswidth =     1,
-};
-
-static struct mtd_partition ocotea_small_partitions[] = {
-       {
-               .name =   "pibs",
-               .offset = 0x0,
-               .size =   0x100000,
-       }
-};
-
-static struct mtd_partition ocotea_large_partitions[] = {
-       {
-               .name =   "fs",
-               .offset = 0,
-               .size =   0x300000,
-       },
-       {
-               .name =   "firmware",
-               .offset = 0x300000,
-               .size =   0x100000,
-       }
-};
-
-int __init init_ocotea(void)
-{
-       u8 fpga0_reg;
-       u8 *fpga0_adr;
-       unsigned long long small_flash_base, large_flash_base;
-
-       fpga0_adr = ioremap64(OCOTEA_FPGA_ADDR, 16);
-       if (!fpga0_adr)
-               return -ENOMEM;
-
-       fpga0_reg = readb((unsigned long)fpga0_adr);
-       iounmap(fpga0_adr);
-
-       if (OCOTEA_BOOT_LARGE_FLASH(fpga0_reg)) {
-               small_flash_base = OCOTEA_SMALL_FLASH_HIGH;
-               large_flash_base = OCOTEA_LARGE_FLASH_LOW;
-       }
-       else {
-               small_flash_base = OCOTEA_SMALL_FLASH_LOW;
-               large_flash_base = OCOTEA_LARGE_FLASH_HIGH;
-       }
-
-       ocotea_small_map.phys = small_flash_base;
-       ocotea_small_map.virt = ioremap64(small_flash_base,
-                                        ocotea_small_map.size);
-
-       if (!ocotea_small_map.virt) {
-               printk("Failed to ioremap flash\n");
-               return -EIO;
-       }
-
-       simple_map_init(&ocotea_small_map);
-
-       flash = do_map_probe("map_rom", &ocotea_small_map);
-       if (flash) {
-               flash->owner = THIS_MODULE;
-               add_mtd_partitions(flash, ocotea_small_partitions,
-                                       ARRAY_SIZE(ocotea_small_partitions));
-       } else {
-               printk("map probe failed for flash\n");
-               iounmap(ocotea_small_map.virt);
-               return -ENXIO;
-       }
-
-       ocotea_large_map.phys = large_flash_base;
-       ocotea_large_map.virt = ioremap64(large_flash_base,
-                                        ocotea_large_map.size);
-
-       if (!ocotea_large_map.virt) {
-               printk("Failed to ioremap flash\n");
-               iounmap(ocotea_small_map.virt);
-               return -EIO;
-       }
-
-       simple_map_init(&ocotea_large_map);
-
-       flash = do_map_probe("cfi_probe", &ocotea_large_map);
-       if (flash) {
-               flash->owner = THIS_MODULE;
-               add_mtd_partitions(flash, ocotea_large_partitions,
-                                       ARRAY_SIZE(ocotea_large_partitions));
-       } else {
-               printk("map probe failed for flash\n");
-               iounmap(ocotea_small_map.virt);
-               iounmap(ocotea_large_map.virt);
-               return -ENXIO;
-       }
-
-       return 0;
-}
-
-static void __exit cleanup_ocotea(void)
-{
-       if (flash) {
-               del_mtd_partitions(flash);
-               map_destroy(flash);
-       }
-
-       if (ocotea_small_map.virt) {
-               iounmap((void *)ocotea_small_map.virt);
-               ocotea_small_map.virt = 0;
-       }
-
-       if (ocotea_large_map.virt) {
-               iounmap((void *)ocotea_large_map.virt);
-               ocotea_large_map.virt = 0;
-       }
-}
-
-module_init(init_ocotea);
-module_exit(cleanup_ocotea);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 440GX Ocotea boards");
diff --git a/drivers/mtd/maps/omap-toto-flash.c b/drivers/mtd/maps/omap-toto-flash.c
deleted file mode 100644 (file)
index 0a60ebb..0000000
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * NOR Flash memory access on TI Toto board
- *
- * jzhang@ti.com (C) 2003 Texas Instruments.
- *
- *  (C) 2002 MontVista Software, Inc.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-
-#include <asm/hardware.h>
-#include <asm/io.h>
-
-
-#ifndef CONFIG_ARCH_OMAP
-#error This is for OMAP architecture only
-#endif
-
-//these lines need be moved to a hardware header file
-#define OMAP_TOTO_FLASH_BASE 0xd8000000
-#define OMAP_TOTO_FLASH_SIZE 0x80000
-
-static struct map_info omap_toto_map_flash = {
-       .name =         "OMAP Toto flash",
-       .bankwidth =    2,
-       .virt =         (void __iomem *)OMAP_TOTO_FLASH_BASE,
-};
-
-
-static struct mtd_partition toto_flash_partitions[] = {
-       {
-               .name =         "BootLoader",
-               .size =         0x00040000,     /* hopefully u-boot will stay 128k + 128*/
-               .offset =       0,
-               .mask_flags =   MTD_WRITEABLE,  /* force read-only */
-       }, {
-               .name =         "ReservedSpace",
-               .size =         0x00030000,
-               .offset =       MTDPART_OFS_APPEND,
-               //mask_flags:   MTD_WRITEABLE,  /* force read-only */
-       }, {
-               .name =         "EnvArea",      /* bottom 64KiB for env vars */
-               .size =         MTDPART_SIZ_FULL,
-               .offset =       MTDPART_OFS_APPEND,
-       }
-};
-
-static struct mtd_partition *parsed_parts;
-
-static struct mtd_info *flash_mtd;
-
-static int __init init_flash (void)
-{
-
-       struct mtd_partition *parts;
-       int nb_parts = 0;
-       int parsed_nr_parts = 0;
-       const char *part_type;
-
-       /*
-        * Static partition definition selection
-        */
-       part_type = "static";
-
-       parts = toto_flash_partitions;
-       nb_parts = ARRAY_SIZE(toto_flash_partitions);
-       omap_toto_map_flash.size = OMAP_TOTO_FLASH_SIZE;
-       omap_toto_map_flash.phys = virt_to_phys(OMAP_TOTO_FLASH_BASE);
-
-       simple_map_init(&omap_toto_map_flash);
-       /*
-        * Now let's probe for the actual flash.  Do it here since
-        * specific machine settings might have been set above.
-        */
-       printk(KERN_NOTICE "OMAP toto flash: probing %d-bit flash bus\n",
-               omap_toto_map_flash.bankwidth*8);
-       flash_mtd = do_map_probe("jedec_probe", &omap_toto_map_flash);
-       if (!flash_mtd)
-               return -ENXIO;
-
-       if (parsed_nr_parts > 0) {
-               parts = parsed_parts;
-               nb_parts = parsed_nr_parts;
-       }
-
-       if (nb_parts == 0) {
-               printk(KERN_NOTICE "OMAP toto flash: no partition info available,"
-                       "registering whole flash at once\n");
-               if (add_mtd_device(flash_mtd)){
-            return -ENXIO;
-        }
-       } else {
-               printk(KERN_NOTICE "Using %s partition definition\n",
-                       part_type);
-               return add_mtd_partitions(flash_mtd, parts, nb_parts);
-       }
-       return 0;
-}
-
-int __init omap_toto_mtd_init(void)
-{
-       int status;
-
-       if (status = init_flash()) {
-               printk(KERN_ERR "OMAP Toto Flash: unable to init map for toto flash\n");
-       }
-    return status;
-}
-
-static void  __exit omap_toto_mtd_cleanup(void)
-{
-       if (flash_mtd) {
-               del_mtd_partitions(flash_mtd);
-               map_destroy(flash_mtd);
-               kfree(parsed_parts);
-       }
-}
-
-module_init(omap_toto_mtd_init);
-module_exit(omap_toto_mtd_cleanup);
-
-MODULE_AUTHOR("Jian Zhang");
-MODULE_DESCRIPTION("OMAP Toto board map driver");
-MODULE_LICENSE("GPL");
index 5c6a25c90380aa70ab2afb4b3f34860adf76d43f..48f4cf5cb9d1d8f18a31c9f80af75b703d933b21 100644 (file)
@@ -203,15 +203,8 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
                 * not enabled, should we be allocating a new resource for it
                 * or simply enabling it?
                 */
-               if (!(pci_resource_flags(dev, PCI_ROM_RESOURCE) &
-                                   IORESOURCE_ROM_ENABLE)) {
-                       u32 val;
-                       pci_resource_flags(dev, PCI_ROM_RESOURCE) |= IORESOURCE_ROM_ENABLE;
-                       pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
-                       val |= PCI_ROM_ADDRESS_ENABLE;
-                       pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
-                       printk("%s: enabling expansion ROM\n", pci_name(dev));
-               }
+               pci_enable_rom(dev);
+               printk("%s: enabling expansion ROM\n", pci_name(dev));
        }
 
        if (!len || !base)
@@ -232,18 +225,13 @@ intel_dc21285_init(struct pci_dev *dev, struct map_pci_info *map)
 static void
 intel_dc21285_exit(struct pci_dev *dev, struct map_pci_info *map)
 {
-       u32 val;
-
        if (map->base)
                iounmap(map->base);
 
        /*
         * We need to undo the PCI BAR2/PCI ROM BAR address alteration.
         */
-       pci_resource_flags(dev, PCI_ROM_RESOURCE) &= ~IORESOURCE_ROM_ENABLE;
-       pci_read_config_dword(dev, PCI_ROM_ADDRESS, &val);
-       val &= ~PCI_ROM_ADDRESS_ENABLE;
-       pci_write_config_dword(dev, PCI_ROM_ADDRESS, val);
+       pci_disable_rom(dev);
 }
 
 static unsigned long
index 49acd41718934f2a671d887c94e6603b596f32f7..5fcfec034a9466819f83fb5ae00670e1c7fa976a 100644 (file)
@@ -230,8 +230,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
 
 #ifdef CONFIG_MTD_OF_PARTS
        if (err == 0) {
-               err = of_mtd_parse_partitions(&dev->dev, info->mtd,
-                                             dp, &info->parts);
+               err = of_mtd_parse_partitions(&dev->dev, dp, &info->parts);
                if (err < 0)
                        return err;
        }
diff --git a/drivers/mtd/maps/walnut.c b/drivers/mtd/maps/walnut.c
deleted file mode 100644 (file)
index e243476..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Mapping for Walnut flash
- * (used ebony.c as a "framework")
- *
- * Heikki Lindholm <holindho@infradead.org>
- *
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/ibm4xx.h>
-#include <platforms/4xx/walnut.h>
-
-/* these should be in platforms/4xx/walnut.h ? */
-#define WALNUT_FLASH_ONBD_N(x)         (x & 0x02)
-#define WALNUT_FLASH_SRAM_SEL(x)       (x & 0x01)
-#define WALNUT_FLASH_LOW               0xFFF00000
-#define WALNUT_FLASH_HIGH              0xFFF80000
-#define WALNUT_FLASH_SIZE              0x80000
-
-static struct mtd_info *flash;
-
-static struct map_info walnut_map = {
-       .name =         "Walnut flash",
-       .size =         WALNUT_FLASH_SIZE,
-       .bankwidth =    1,
-};
-
-/* Actually, OpenBIOS is the last 128 KiB of the flash - better
- * partitioning could be made */
-static struct mtd_partition walnut_partitions[] = {
-       {
-               .name =   "OpenBIOS",
-               .offset = 0x0,
-               .size =   WALNUT_FLASH_SIZE,
-               /*.mask_flags = MTD_WRITEABLE, */ /* force read-only */
-       }
-};
-
-int __init init_walnut(void)
-{
-       u8 fpga_brds1;
-       void *fpga_brds1_adr;
-       void *fpga_status_adr;
-       unsigned long flash_base;
-
-       /* this should already be mapped (platform/4xx/walnut.c) */
-       fpga_status_adr = ioremap(WALNUT_FPGA_BASE, 8);
-       if (!fpga_status_adr)
-               return -ENOMEM;
-
-       fpga_brds1_adr = fpga_status_adr+5;
-       fpga_brds1 = readb(fpga_brds1_adr);
-       /* iounmap(fpga_status_adr); */
-
-       if (WALNUT_FLASH_ONBD_N(fpga_brds1)) {
-               printk("The on-board flash is disabled (U79 sw 5)!");
-               iounmap(fpga_status_adr);
-               return -EIO;
-       }
-       if (WALNUT_FLASH_SRAM_SEL(fpga_brds1))
-               flash_base = WALNUT_FLASH_LOW;
-       else
-               flash_base = WALNUT_FLASH_HIGH;
-
-       walnut_map.phys = flash_base;
-       walnut_map.virt =
-               (void __iomem *)ioremap(flash_base, walnut_map.size);
-
-       if (!walnut_map.virt) {
-               printk("Failed to ioremap flash.\n");
-               iounmap(fpga_status_adr);
-               return -EIO;
-       }
-
-       simple_map_init(&walnut_map);
-
-       flash = do_map_probe("jedec_probe", &walnut_map);
-       if (flash) {
-               flash->owner = THIS_MODULE;
-               add_mtd_partitions(flash, walnut_partitions,
-                                       ARRAY_SIZE(walnut_partitions));
-       } else {
-               printk("map probe failed for flash\n");
-               iounmap(fpga_status_adr);
-               return -ENXIO;
-       }
-
-       iounmap(fpga_status_adr);
-       return 0;
-}
-
-static void __exit cleanup_walnut(void)
-{
-       if (flash) {
-               del_mtd_partitions(flash);
-               map_destroy(flash);
-       }
-
-       if (walnut_map.virt) {
-               iounmap((void *)walnut_map.virt);
-               walnut_map.virt = 0;
-       }
-}
-
-module_init(init_walnut);
-module_exit(cleanup_walnut);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Heikki Lindholm <holindho@infradead.org>");
-MODULE_DESCRIPTION("MTD map and partitions for IBM 405GP Walnut boards");
index 1c74762dec89c25f73c3d7ab7fa4fbdaf2e08688..963840e9b5bf3d25177baa281b3eb70769d69950 100644 (file)
@@ -348,7 +348,7 @@ static void mtdchar_erase_callback (struct erase_info *instr)
        wake_up((wait_queue_head_t *)instr->priv);
 }
 
-#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP)
+#ifdef CONFIG_HAVE_MTD_OTP
 static int otp_select_filemode(struct mtd_file_info *mfi, int mode)
 {
        struct mtd_info *mtd = mfi->mtd;
@@ -665,7 +665,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file,
                break;
        }
 
-#if defined(CONFIG_MTD_OTP) || defined(CONFIG_MTD_ONENAND_OTP)
+#ifdef CONFIG_HAVE_MTD_OTP
        case OTPSELECT:
        {
                int mode;
index 2972a5edb73d507e60b9b633b5e85e14a9070074..789842d0e6f21352023729c4037327205dccc067 100644 (file)
@@ -444,7 +444,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
                        return -EINVAL;
        }
 
-       instr->fail_addr = 0xffffffff;
+       instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
        /* make a local copy of instr to avoid modifying the caller's struct */
        erase = kmalloc(sizeof (struct erase_info), GFP_KERNEL);
@@ -493,7 +493,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr)
                        /* sanity check: should never happen since
                         * block alignment has been checked above */
                        BUG_ON(err == -EINVAL);
-                       if (erase->fail_addr != 0xffffffff)
+                       if (erase->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
                                instr->fail_addr = erase->fail_addr + offset;
                        break;
                }
index 5a680e1e61f14dbc2ba684482792f2f40aabbee9..aebb3b27edbd7554135b7e4899850eb19f929b28 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/interrupt.h>
 #include <linux/mtd/mtd.h>
 
+#define MTDOOPS_KERNMSG_MAGIC 0x5d005d00
 #define OOPS_PAGE_SIZE 4096
 
 static struct mtdoops_context {
@@ -99,7 +100,7 @@ static void mtdoops_inc_counter(struct mtdoops_context *cxt)
        int ret;
 
        cxt->nextpage++;
-       if (cxt->nextpage > cxt->oops_pages)
+       if (cxt->nextpage >= cxt->oops_pages)
                cxt->nextpage = 0;
        cxt->nextcount++;
        if (cxt->nextcount == 0xffffffff)
@@ -141,7 +142,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work)
        mod = (cxt->nextpage * OOPS_PAGE_SIZE) % mtd->erasesize;
        if (mod != 0) {
                cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / OOPS_PAGE_SIZE);
-               if (cxt->nextpage > cxt->oops_pages)
+               if (cxt->nextpage >= cxt->oops_pages)
                        cxt->nextpage = 0;
        }
 
@@ -158,7 +159,7 @@ badblock:
                                cxt->nextpage * OOPS_PAGE_SIZE);
                i++;
                cxt->nextpage = cxt->nextpage + (mtd->erasesize / OOPS_PAGE_SIZE);
-               if (cxt->nextpage > cxt->oops_pages)
+               if (cxt->nextpage >= cxt->oops_pages)
                        cxt->nextpage = 0;
                if (i == (cxt->oops_pages / (mtd->erasesize / OOPS_PAGE_SIZE))) {
                        printk(KERN_ERR "mtdoops: All blocks bad!\n");
@@ -224,40 +225,40 @@ static void find_next_position(struct mtdoops_context *cxt)
 {
        struct mtd_info *mtd = cxt->mtd;
        int ret, page, maxpos = 0;
-       u32 count, maxcount = 0xffffffff;
+       u32 count[2], maxcount = 0xffffffff;
        size_t retlen;
 
        for (page = 0; page < cxt->oops_pages; page++) {
-               ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 4, &retlen, (u_char *) &count);
-               if ((retlen != 4) || ((ret < 0) && (ret != -EUCLEAN))) {
-                       printk(KERN_ERR "mtdoops: Read failure at %d (%td of 4 read)"
+               ret = mtd->read(mtd, page * OOPS_PAGE_SIZE, 8, &retlen, (u_char *) &count[0]);
+               if ((retlen != 8) || ((ret < 0) && (ret != -EUCLEAN))) {
+                       printk(KERN_ERR "mtdoops: Read failure at %d (%td of 8 read)"
                                ", err %d.\n", page * OOPS_PAGE_SIZE, retlen, ret);
                        continue;
                }
 
-               if (count == 0xffffffff)
+               if (count[1] != MTDOOPS_KERNMSG_MAGIC)
+                       continue;
+               if (count[0] == 0xffffffff)
                        continue;
                if (maxcount == 0xffffffff) {
-                       maxcount = count;
+                       maxcount = count[0];
                        maxpos = page;
-               } else if ((count < 0x40000000) && (maxcount > 0xc0000000)) {
-                       maxcount = count;
+               } else if ((count[0] < 0x40000000) && (maxcount > 0xc0000000)) {
+                       maxcount = count[0];
                        maxpos = page;
-               } else if ((count > maxcount) && (count < 0xc0000000)) {
-                       maxcount = count;
+               } else if ((count[0] > maxcount) && (count[0] < 0xc0000000)) {
+                       maxcount = count[0];
                        maxpos = page;
-               } else if ((count > maxcount) && (count > 0xc0000000)
+               } else if ((count[0] > maxcount) && (count[0] > 0xc0000000)
                                        && (maxcount > 0x80000000)) {
-                       maxcount = count;
+                       maxcount = count[0];
                        maxpos = page;
                }
        }
        if (maxcount == 0xffffffff) {
                cxt->nextpage = 0;
                cxt->nextcount = 1;
-               cxt->ready = 1;
-               printk(KERN_DEBUG "mtdoops: Ready %d, %d (first init)\n",
-                               cxt->nextpage, cxt->nextcount);
+               schedule_work(&cxt->work_erase);
                return;
        }
 
@@ -358,8 +359,9 @@ mtdoops_console_write(struct console *co, const char *s, unsigned int count)
 
        if (cxt->writecount == 0) {
                u32 *stamp = cxt->oops_buf;
-               *stamp = cxt->nextcount;
-               cxt->writecount = 4;
+               *stamp++ = cxt->nextcount;
+               *stamp = MTDOOPS_KERNMSG_MAGIC;
+               cxt->writecount = 8;
        }
 
        if ((count + cxt->writecount) > OOPS_PAGE_SIZE)
index 9a06dc93ee0d36a14428b618d810fb2ece02140d..3728913fa5fa553ebc358f0bd6792f8c74ec0699 100644 (file)
@@ -214,7 +214,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
        instr->addr += part->offset;
        ret = part->master->erase(part->master, instr);
        if (ret) {
-               if (instr->fail_addr != 0xffffffff)
+               if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
                        instr->fail_addr -= part->offset;
                instr->addr -= part->offset;
        }
@@ -226,7 +226,7 @@ void mtd_erase_callback(struct erase_info *instr)
        if (instr->mtd->erase == part_erase) {
                struct mtd_part *part = PART(instr->mtd);
 
-               if (instr->fail_addr != 0xffffffff)
+               if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
                        instr->fail_addr -= part->offset;
                instr->addr -= part->offset;
        }
index 41f361c49b32b73344b62f0838eb35647328dceb..1c2e9450d663ca9e7c2c3fd8a8a5a8b98b26755c 100644 (file)
@@ -56,6 +56,12 @@ config MTD_NAND_H1900
        help
          This enables the driver for the iPAQ h1900 flash.
 
+config MTD_NAND_GPIO
+       tristate "GPIO NAND Flash driver"
+       depends on GENERIC_GPIO && ARM
+       help
+         This enables a GPIO based NAND flash driver.
+
 config MTD_NAND_SPIA
        tristate "NAND Flash device on SPIA board"
        depends on ARCH_P720T
@@ -68,12 +74,6 @@ config MTD_NAND_AMS_DELTA
        help
          Support for NAND flash on Amstrad E3 (Delta).
 
-config MTD_NAND_TOTO
-       tristate "NAND Flash device on TOTO board"
-       depends on ARCH_OMAP && BROKEN
-       help
-         Support for NAND flash on Texas Instruments Toto platform.
-
 config MTD_NAND_TS7250
        tristate "NAND Flash device on TS-7250 board"
        depends on MACH_TS72XX
@@ -163,13 +163,6 @@ config MTD_NAND_S3C2410_HWECC
          incorrect ECC generation, and if using these, the default of
          software ECC is preferable.
 
-config MTD_NAND_NDFC
-       tristate "NDFC NanD Flash Controller"
-       depends on 4xx && !PPC_MERGE
-       select MTD_NAND_ECC_SMC
-       help
-        NDFC Nand Flash Controllers are integrated in IBM/AMCC's 4xx SoCs
-
 config MTD_NAND_S3C2410_CLKSTOP
        bool "S3C2410 NAND IDLE clock stop"
        depends on MTD_NAND_S3C2410
@@ -340,6 +333,13 @@ config MTD_NAND_PXA3xx
          This enables the driver for the NAND flash device found on
          PXA3xx processors
 
+config MTD_NAND_PXA3xx_BUILTIN
+       bool "Use builtin definitions for some NAND chips (deprecated)"
+       depends on MTD_NAND_PXA3xx
+       help
+         This enables builtin definitions for some NAND chips. This
+         is deprecated in favor of platform specific data.
+
 config MTD_NAND_CM_X270
        tristate "Support for NAND Flash on CM-X270 modules"
        depends on MTD_NAND && MACH_ARMCORE
@@ -400,10 +400,24 @@ config MTD_NAND_FSL_ELBC
 
 config MTD_NAND_FSL_UPM
        tristate "Support for NAND on Freescale UPM"
-       depends on MTD_NAND && OF_GPIO && (PPC_83xx || PPC_85xx)
+       depends on MTD_NAND && (PPC_83xx || PPC_85xx)
        select FSL_LBC
        help
          Enables support for NAND Flash chips wired onto Freescale PowerPC
          processor localbus with User-Programmable Machine support.
 
+config MTD_NAND_MXC
+       tristate "MXC NAND support"
+       depends on ARCH_MX2
+       help
+         This enables the driver for the NAND flash controller on the
+         MXC processors.
+
+config MTD_NAND_SH_FLCTL
+       tristate "Support for NAND on Renesas SuperH FLCTL"
+       depends on MTD_NAND && SUPERH && CPU_SUBTYPE_SH7723
+       help
+         Several Renesas SuperH CPU has FLCTL. This option enables support
+         for NAND Flash using FLCTL. This driver support SH7723.
+
 endif # MTD_NAND
index b786c5da82da227fe388bf468c6dc370caee49ea..b661586afbfc38f5b2d616b4c5615207c9d5862e 100644 (file)
@@ -8,7 +8,6 @@ obj-$(CONFIG_MTD_NAND_IDS)              += nand_ids.o
 obj-$(CONFIG_MTD_NAND_CAFE)            += cafe_nand.o
 obj-$(CONFIG_MTD_NAND_SPIA)            += spia.o
 obj-$(CONFIG_MTD_NAND_AMS_DELTA)       += ams-delta.o
-obj-$(CONFIG_MTD_NAND_TOTO)            += toto.o
 obj-$(CONFIG_MTD_NAND_AUTCPU12)                += autcpu12.o
 obj-$(CONFIG_MTD_NAND_EDB7312)         += edb7312.o
 obj-$(CONFIG_MTD_NAND_AU1550)          += au1550nd.o
@@ -24,6 +23,7 @@ obj-$(CONFIG_MTD_NAND_NANDSIM)                += nandsim.o
 obj-$(CONFIG_MTD_NAND_CS553X)          += cs553x_nand.o
 obj-$(CONFIG_MTD_NAND_NDFC)            += ndfc.o
 obj-$(CONFIG_MTD_NAND_ATMEL)           += atmel_nand.o
+obj-$(CONFIG_MTD_NAND_GPIO)            += gpio.o
 obj-$(CONFIG_MTD_NAND_CM_X270)         += cmx270_nand.o
 obj-$(CONFIG_MTD_NAND_BASLER_EXCITE)   += excite_nandflash.o
 obj-$(CONFIG_MTD_NAND_PXA3xx)          += pxa3xx_nand.o
@@ -34,5 +34,7 @@ obj-$(CONFIG_MTD_NAND_PASEMI)         += pasemi_nand.o
 obj-$(CONFIG_MTD_NAND_ORION)           += orion_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_ELBC)                += fsl_elbc_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_UPM)         += fsl_upm.o
+obj-$(CONFIG_MTD_NAND_SH_FLCTL)                += sh_flctl.o
+obj-$(CONFIG_MTD_NAND_MXC)             += mxc_nand.o
 
 nand-objs := nand_base.o nand_bbt.o
index 3387e0d5076b06bad1af8506d73f673db99a1d2a..c98c1570a40b1ef5f3f3931ab4e0512be0341406 100644 (file)
@@ -173,48 +173,6 @@ static void atmel_write_buf16(struct mtd_info *mtd, const u8 *buf, int len)
        __raw_writesw(nand_chip->IO_ADDR_W, buf, len / 2);
 }
 
-/*
- * write oob for small pages
- */
-static int atmel_nand_write_oob_512(struct mtd_info *mtd,
-               struct nand_chip *chip, int page)
-{
-       int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad;
-       int eccsize = chip->ecc.size, length = mtd->oobsize;
-       int len, pos, status = 0;
-       const uint8_t *bufpoi = chip->oob_poi;
-
-       pos = eccsize + chunk;
-
-       chip->cmdfunc(mtd, NAND_CMD_SEQIN, pos, page);
-       len = min_t(int, length, chunk);
-       chip->write_buf(mtd, bufpoi, len);
-       bufpoi += len;
-       length -= len;
-       if (length > 0)
-               chip->write_buf(mtd, bufpoi, length);
-
-       chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
-       status = chip->waitfunc(mtd, chip);
-
-       return status & NAND_STATUS_FAIL ? -EIO : 0;
-
-}
-
-/*
- * read oob for small pages
- */
-static int atmel_nand_read_oob_512(struct mtd_info *mtd,
-               struct nand_chip *chip, int page, int sndcmd)
-{
-       if (sndcmd) {
-               chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
-               sndcmd = 0;
-       }
-       chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
-       return sndcmd;
-}
-
 /*
  * Calculate HW ECC
  *
@@ -235,14 +193,14 @@ static int atmel_nand_calculate(struct mtd_info *mtd,
        /* get the first 2 ECC bytes */
        ecc_value = ecc_readl(host->ecc, PR);
 
-       ecc_code[eccpos[0]] = ecc_value & 0xFF;
-       ecc_code[eccpos[1]] = (ecc_value >> 8) & 0xFF;
+       ecc_code[0] = ecc_value & 0xFF;
+       ecc_code[1] = (ecc_value >> 8) & 0xFF;
 
        /* get the last 2 ECC bytes */
        ecc_value = ecc_readl(host->ecc, NPR) & ATMEL_ECC_NPARITY;
 
-       ecc_code[eccpos[2]] = ecc_value & 0xFF;
-       ecc_code[eccpos[3]] = (ecc_value >> 8) & 0xFF;
+       ecc_code[2] = ecc_value & 0xFF;
+       ecc_code[3] = (ecc_value >> 8) & 0xFF;
 
        return 0;
 }
@@ -476,14 +434,12 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
                        res = -EIO;
                        goto err_ecc_ioremap;
                }
-               nand_chip->ecc.mode = NAND_ECC_HW_SYNDROME;
+               nand_chip->ecc.mode = NAND_ECC_HW;
                nand_chip->ecc.calculate = atmel_nand_calculate;
                nand_chip->ecc.correct = atmel_nand_correct;
                nand_chip->ecc.hwctl = atmel_nand_hwctl;
                nand_chip->ecc.read_page = atmel_nand_read_page;
                nand_chip->ecc.bytes = 4;
-               nand_chip->ecc.prepad = 0;
-               nand_chip->ecc.postpad = 0;
        }
 
        nand_chip->chip_delay = 20;             /* 20us command delay time */
@@ -514,7 +470,7 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
                goto err_scan_ident;
        }
 
-       if (nand_chip->ecc.mode == NAND_ECC_HW_SYNDROME) {
+       if (nand_chip->ecc.mode == NAND_ECC_HW) {
                /* ECC is calculated for the whole page (1 step) */
                nand_chip->ecc.size = mtd->writesize;
 
@@ -522,8 +478,6 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
                switch (mtd->writesize) {
                case 512:
                        nand_chip->ecc.layout = &atmel_oobinfo_small;
-                       nand_chip->ecc.read_oob = atmel_nand_read_oob_512;
-                       nand_chip->ecc.write_oob = atmel_nand_write_oob_512;
                        ecc_writel(host->ecc, MR, ATMEL_ECC_PAGESIZE_528);
                        break;
                case 1024:
index 3370a800fd3612fd43f6bb54d4cf8a36cbdf2f14..9f1b451005ca14f8ce73aaa3c64e087cf2e7e607 100644 (file)
@@ -289,8 +289,10 @@ static int __init cs553x_init(void)
        int i;
        uint64_t val;
 
+#ifdef CONFIG_MTD_PARTITIONS
        int mtd_parts_nb = 0;
        struct mtd_partition *mtd_parts = NULL;
+#endif
 
        /* If the CPU isn't a Geode GX or LX, abort */
        if (!is_geode())
index 98ad3cefcaf47d5678c81b9a4481ff4c6bec5b4e..4aa5bd6158daf8060c8dcb7f7e06dc9286c64203 100644 (file)
@@ -918,8 +918,7 @@ static int __devinit fsl_elbc_chip_probe(struct fsl_elbc_ctrl *ctrl,
 
 #ifdef CONFIG_MTD_OF_PARTS
        if (ret == 0) {
-               ret = of_mtd_parse_partitions(priv->dev, &priv->mtd,
-                                             node, &parts);
+               ret = of_mtd_parse_partitions(priv->dev, node, &parts);
                if (ret < 0)
                        goto err;
        }
index 1ebfd87f00b401de487f65836aac60e810bf5214..024e3fffd4bb49b6176af4701c07e344120a3fc2 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/partitions.h>
@@ -36,8 +37,6 @@ struct fsl_upm_nand {
        uint8_t upm_cmd_offset;
        void __iomem *io_base;
        int rnb_gpio;
-       const uint32_t *wait_pattern;
-       const uint32_t *wait_write;
        int chip_delay;
 };
 
@@ -61,10 +60,11 @@ static void fun_wait_rnb(struct fsl_upm_nand *fun)
        if (fun->rnb_gpio >= 0) {
                while (--cnt && !fun_chip_ready(&fun->mtd))
                        cpu_relax();
+               if (!cnt)
+                       dev_err(fun->dev, "tired waiting for RNB\n");
+       } else {
+               ndelay(100);
        }
-
-       if (!cnt)
-               dev_err(fun->dev, "tired waiting for RNB\n");
 }
 
 static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
@@ -89,8 +89,7 @@ static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 
        fsl_upm_run_pattern(&fun->upm, fun->io_base, cmd);
 
-       if (fun->wait_pattern)
-               fun_wait_rnb(fun);
+       fun_wait_rnb(fun);
 }
 
 static uint8_t fun_read_byte(struct mtd_info *mtd)
@@ -116,14 +115,16 @@ static void fun_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 
        for (i = 0; i < len; i++) {
                out_8(fun->chip.IO_ADDR_W, buf[i]);
-               if (fun->wait_write)
-                       fun_wait_rnb(fun);
+               fun_wait_rnb(fun);
        }
 }
 
-static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
+static int __devinit fun_chip_init(struct fsl_upm_nand *fun,
+                                  const struct device_node *upm_np,
+                                  const struct resource *io_res)
 {
        int ret;
+       struct device_node *flash_np;
 #ifdef CONFIG_MTD_PARTITIONS
        static const char *part_types[] = { "cmdlinepart", NULL, };
 #endif
@@ -143,18 +144,37 @@ static int __devinit fun_chip_init(struct fsl_upm_nand *fun)
        fun->mtd.priv = &fun->chip;
        fun->mtd.owner = THIS_MODULE;
 
+       flash_np = of_get_next_child(upm_np, NULL);
+       if (!flash_np)
+               return -ENODEV;
+
+       fun->mtd.name = kasprintf(GFP_KERNEL, "%x.%s", io_res->start,
+                                 flash_np->name);
+       if (!fun->mtd.name) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
        ret = nand_scan(&fun->mtd, 1);
        if (ret)
-               return ret;
-
-       fun->mtd.name = fun->dev->bus_id;
+               goto err;
 
 #ifdef CONFIG_MTD_PARTITIONS
        ret = parse_mtd_partitions(&fun->mtd, part_types, &fun->parts, 0);
+
+#ifdef CONFIG_MTD_OF_PARTS
+       if (ret == 0)
+               ret = of_mtd_parse_partitions(fun->dev, &fun->mtd,
+                                             flash_np, &fun->parts);
+#endif
        if (ret > 0)
-               return add_mtd_partitions(&fun->mtd, fun->parts, ret);
+               ret = add_mtd_partitions(&fun->mtd, fun->parts, ret);
+       else
 #endif
-       return add_mtd_device(&fun->mtd);
+               ret = add_mtd_device(&fun->mtd);
+err:
+       of_node_put(flash_np);
+       return ret;
 }
 
 static int __devinit fun_probe(struct of_device *ofdev,
@@ -211,6 +231,12 @@ static int __devinit fun_probe(struct of_device *ofdev,
                goto err2;
        }
 
+       prop = of_get_property(ofdev->node, "chip-delay", NULL);
+       if (prop)
+               fun->chip_delay = *prop;
+       else
+               fun->chip_delay = 50;
+
        fun->io_base = devm_ioremap_nocache(&ofdev->dev, io_res.start,
                                          io_res.end - io_res.start + 1);
        if (!fun->io_base) {
@@ -220,17 +246,8 @@ static int __devinit fun_probe(struct of_device *ofdev,
 
        fun->dev = &ofdev->dev;
        fun->last_ctrl = NAND_CLE;
-       fun->wait_pattern = of_get_property(ofdev->node, "fsl,wait-pattern",
-                                           NULL);
-       fun->wait_write = of_get_property(ofdev->node, "fsl,wait-write", NULL);
-
-       prop = of_get_property(ofdev->node, "chip-delay", NULL);
-       if (prop)
-               fun->chip_delay = *prop;
-       else
-               fun->chip_delay = 50;
 
-       ret = fun_chip_init(fun);
+       ret = fun_chip_init(fun, ofdev->node, &io_res);
        if (ret)
                goto err2;
 
@@ -251,6 +268,7 @@ static int __devexit fun_remove(struct of_device *ofdev)
        struct fsl_upm_nand *fun = dev_get_drvdata(&ofdev->dev);
 
        nand_release(&fun->mtd);
+       kfree(fun->mtd.name);
 
        if (fun->rnb_gpio >= 0)
                gpio_free(fun->rnb_gpio);
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
new file mode 100644 (file)
index 0000000..8f902e7
--- /dev/null
@@ -0,0 +1,375 @@
+/*
+ * drivers/mtd/nand/gpio.c
+ *
+ * Updated, and converted to generic GPIO based driver by Russell King.
+ *
+ * Written by Ben Dooks <ben@simtec.co.uk>
+ *   Based on 2.4 version by Mark Whittaker
+ *
+ * Â© 2004 Simtec Electronics
+ *
+ * Device driver for NAND connected via GPIO
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/nand-gpio.h>
+
+struct gpiomtd {
+       void __iomem            *io_sync;
+       struct mtd_info         mtd_info;
+       struct nand_chip        nand_chip;
+       struct gpio_nand_platdata plat;
+};
+
+#define gpio_nand_getpriv(x) container_of(x, struct gpiomtd, mtd_info)
+
+
+#ifdef CONFIG_ARM
+/* gpio_nand_dosync()
+ *
+ * Make sure the GPIO state changes occur in-order with writes to NAND
+ * memory region.
+ * Needed on PXA due to bus-reordering within the SoC itself (see section on
+ * I/O ordering in PXA manual (section 2.3, p35)
+ */
+static void gpio_nand_dosync(struct gpiomtd *gpiomtd)
+{
+       unsigned long tmp;
+
+       if (gpiomtd->io_sync) {
+               /*
+                * Linux memory barriers don't cater for what's required here.
+                * What's required is what's here - a read from a separate
+                * region with a dependency on that read.
+                */
+               tmp = readl(gpiomtd->io_sync);
+               asm volatile("mov %1, %0\n" : "=r" (tmp) : "r" (tmp));
+       }
+}
+#else
+static inline void gpio_nand_dosync(struct gpiomtd *gpiomtd) {}
+#endif
+
+static void gpio_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+{
+       struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+
+       gpio_nand_dosync(gpiomtd);
+
+       if (ctrl & NAND_CTRL_CHANGE) {
+               gpio_set_value(gpiomtd->plat.gpio_nce, !(ctrl & NAND_NCE));
+               gpio_set_value(gpiomtd->plat.gpio_cle, !!(ctrl & NAND_CLE));
+               gpio_set_value(gpiomtd->plat.gpio_ale, !!(ctrl & NAND_ALE));
+               gpio_nand_dosync(gpiomtd);
+       }
+       if (cmd == NAND_CMD_NONE)
+               return;
+
+       writeb(cmd, gpiomtd->nand_chip.IO_ADDR_W);
+       gpio_nand_dosync(gpiomtd);
+}
+
+static void gpio_nand_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+       struct nand_chip *this = mtd->priv;
+
+       writesb(this->IO_ADDR_W, buf, len);
+}
+
+static void gpio_nand_readbuf(struct mtd_info *mtd, u_char *buf, int len)
+{
+       struct nand_chip *this = mtd->priv;
+
+       readsb(this->IO_ADDR_R, buf, len);
+}
+
+static int gpio_nand_verifybuf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+       struct nand_chip *this = mtd->priv;
+       unsigned char read, *p = (unsigned char *) buf;
+       int i, err = 0;
+
+       for (i = 0; i < len; i++) {
+               read = readb(this->IO_ADDR_R);
+               if (read != p[i]) {
+                       pr_debug("%s: err at %d (read %04x vs %04x)\n",
+                              __func__, i, read, p[i]);
+                       err = -EFAULT;
+               }
+       }
+       return err;
+}
+
+static void gpio_nand_writebuf16(struct mtd_info *mtd, const u_char *buf,
+                                int len)
+{
+       struct nand_chip *this = mtd->priv;
+
+       if (IS_ALIGNED((unsigned long)buf, 2)) {
+               writesw(this->IO_ADDR_W, buf, len>>1);
+       } else {
+               int i;
+               unsigned short *ptr = (unsigned short *)buf;
+
+               for (i = 0; i < len; i += 2, ptr++)
+                       writew(*ptr, this->IO_ADDR_W);
+       }
+}
+
+static void gpio_nand_readbuf16(struct mtd_info *mtd, u_char *buf, int len)
+{
+       struct nand_chip *this = mtd->priv;
+
+       if (IS_ALIGNED((unsigned long)buf, 2)) {
+               readsw(this->IO_ADDR_R, buf, len>>1);
+       } else {
+               int i;
+               unsigned short *ptr = (unsigned short *)buf;
+
+               for (i = 0; i < len; i += 2, ptr++)
+                       *ptr = readw(this->IO_ADDR_R);
+       }
+}
+
+static int gpio_nand_verifybuf16(struct mtd_info *mtd, const u_char *buf,
+                                int len)
+{
+       struct nand_chip *this = mtd->priv;
+       unsigned short read, *p = (unsigned short *) buf;
+       int i, err = 0;
+       len >>= 1;
+
+       for (i = 0; i < len; i++) {
+               read = readw(this->IO_ADDR_R);
+               if (read != p[i]) {
+                       pr_debug("%s: err at %d (read %04x vs %04x)\n",
+                              __func__, i, read, p[i]);
+                       err = -EFAULT;
+               }
+       }
+       return err;
+}
+
+
+static int gpio_nand_devready(struct mtd_info *mtd)
+{
+       struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+       return gpio_get_value(gpiomtd->plat.gpio_rdy);
+}
+
+static int __devexit gpio_nand_remove(struct platform_device *dev)
+{
+       struct gpiomtd *gpiomtd = platform_get_drvdata(dev);
+       struct resource *res;
+
+       nand_release(&gpiomtd->mtd_info);
+
+       res = platform_get_resource(dev, IORESOURCE_MEM, 1);
+       iounmap(gpiomtd->io_sync);
+       if (res)
+               release_mem_region(res->start, res->end - res->start + 1);
+
+       res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+       iounmap(gpiomtd->nand_chip.IO_ADDR_R);
+       release_mem_region(res->start, res->end - res->start + 1);
+
+       if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+               gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
+       gpio_set_value(gpiomtd->plat.gpio_nce, 1);
+
+       gpio_free(gpiomtd->plat.gpio_cle);
+       gpio_free(gpiomtd->plat.gpio_ale);
+       gpio_free(gpiomtd->plat.gpio_nce);
+       if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+               gpio_free(gpiomtd->plat.gpio_nwp);
+       gpio_free(gpiomtd->plat.gpio_rdy);
+
+       kfree(gpiomtd);
+
+       return 0;
+}
+
+static void __iomem *request_and_remap(struct resource *res, size_t size,
+                                       const char *name, int *err)
+{
+       void __iomem *ptr;
+
+       if (!request_mem_region(res->start, res->end - res->start + 1, name)) {
+               *err = -EBUSY;
+               return NULL;
+       }
+
+       ptr = ioremap(res->start, size);
+       if (!ptr) {
+               release_mem_region(res->start, res->end - res->start + 1);
+               *err = -ENOMEM;
+       }
+       return ptr;
+}
+
+static int __devinit gpio_nand_probe(struct platform_device *dev)
+{
+       struct gpiomtd *gpiomtd;
+       struct nand_chip *this;
+       struct resource *res0, *res1;
+       int ret;
+
+       if (!dev->dev.platform_data)
+               return -EINVAL;
+
+       res0 = platform_get_resource(dev, IORESOURCE_MEM, 0);
+       if (!res0)
+               return -EINVAL;
+
+       gpiomtd = kzalloc(sizeof(*gpiomtd), GFP_KERNEL);
+       if (gpiomtd == NULL) {
+               dev_err(&dev->dev, "failed to create NAND MTD\n");
+               return -ENOMEM;
+       }
+
+       this = &gpiomtd->nand_chip;
+       this->IO_ADDR_R = request_and_remap(res0, 2, "NAND", &ret);
+       if (!this->IO_ADDR_R) {
+               dev_err(&dev->dev, "unable to map NAND\n");
+               goto err_map;
+       }
+
+       res1 = platform_get_resource(dev, IORESOURCE_MEM, 1);
+       if (res1) {
+               gpiomtd->io_sync = request_and_remap(res1, 4, "NAND sync", &ret);
+               if (!gpiomtd->io_sync) {
+                       dev_err(&dev->dev, "unable to map sync NAND\n");
+                       goto err_sync;
+               }
+       }
+
+       memcpy(&gpiomtd->plat, dev->dev.platform_data, sizeof(gpiomtd->plat));
+
+       ret = gpio_request(gpiomtd->plat.gpio_nce, "NAND NCE");
+       if (ret)
+               goto err_nce;
+       gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
+       if (gpio_is_valid(gpiomtd->plat.gpio_nwp)) {
+               ret = gpio_request(gpiomtd->plat.gpio_nwp, "NAND NWP");
+               if (ret)
+                       goto err_nwp;
+               gpio_direction_output(gpiomtd->plat.gpio_nwp, 1);
+       }
+       ret = gpio_request(gpiomtd->plat.gpio_ale, "NAND ALE");
+       if (ret)
+               goto err_ale;
+       gpio_direction_output(gpiomtd->plat.gpio_ale, 0);
+       ret = gpio_request(gpiomtd->plat.gpio_cle, "NAND CLE");
+       if (ret)
+               goto err_cle;
+       gpio_direction_output(gpiomtd->plat.gpio_cle, 0);
+       ret = gpio_request(gpiomtd->plat.gpio_rdy, "NAND RDY");
+       if (ret)
+               goto err_rdy;
+       gpio_direction_input(gpiomtd->plat.gpio_rdy);
+
+
+       this->IO_ADDR_W  = this->IO_ADDR_R;
+       this->ecc.mode   = NAND_ECC_SOFT;
+       this->options    = gpiomtd->plat.options;
+       this->chip_delay = gpiomtd->plat.chip_delay;
+
+       /* install our routines */
+       this->cmd_ctrl   = gpio_nand_cmd_ctrl;
+       this->dev_ready  = gpio_nand_devready;
+
+       if (this->options & NAND_BUSWIDTH_16) {
+               this->read_buf   = gpio_nand_readbuf16;
+               this->write_buf  = gpio_nand_writebuf16;
+               this->verify_buf = gpio_nand_verifybuf16;
+       } else {
+               this->read_buf   = gpio_nand_readbuf;
+               this->write_buf  = gpio_nand_writebuf;
+               this->verify_buf = gpio_nand_verifybuf;
+       }
+
+       /* set the mtd private data for the nand driver */
+       gpiomtd->mtd_info.priv = this;
+       gpiomtd->mtd_info.owner = THIS_MODULE;
+
+       if (nand_scan(&gpiomtd->mtd_info, 1)) {
+               dev_err(&dev->dev, "no nand chips found?\n");
+               ret = -ENXIO;
+               goto err_wp;
+       }
+
+       if (gpiomtd->plat.adjust_parts)
+               gpiomtd->plat.adjust_parts(&gpiomtd->plat,
+                                          gpiomtd->mtd_info.size);
+
+       add_mtd_partitions(&gpiomtd->mtd_info, gpiomtd->plat.parts,
+                          gpiomtd->plat.num_parts);
+       platform_set_drvdata(dev, gpiomtd);
+
+       return 0;
+
+err_wp:
+       if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+               gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
+       gpio_free(gpiomtd->plat.gpio_rdy);
+err_rdy:
+       gpio_free(gpiomtd->plat.gpio_cle);
+err_cle:
+       gpio_free(gpiomtd->plat.gpio_ale);
+err_ale:
+       if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+               gpio_free(gpiomtd->plat.gpio_nwp);
+err_nwp:
+       gpio_free(gpiomtd->plat.gpio_nce);
+err_nce:
+       iounmap(gpiomtd->io_sync);
+       if (res1)
+               release_mem_region(res1->start, res1->end - res1->start + 1);
+err_sync:
+       iounmap(gpiomtd->nand_chip.IO_ADDR_R);
+       release_mem_region(res0->start, res0->end - res0->start + 1);
+err_map:
+       kfree(gpiomtd);
+       return ret;
+}
+
+static struct platform_driver gpio_nand_driver = {
+       .probe          = gpio_nand_probe,
+       .remove         = gpio_nand_remove,
+       .driver         = {
+               .name   = "gpio-nand",
+       },
+};
+
+static int __init gpio_nand_init(void)
+{
+       printk(KERN_INFO "GPIO NAND driver, Â© 2004 Simtec Electronics\n");
+
+       return platform_driver_register(&gpio_nand_driver);
+}
+
+static void __exit gpio_nand_exit(void)
+{
+       platform_driver_unregister(&gpio_nand_driver);
+}
+
+module_init(gpio_nand_init);
+module_exit(gpio_nand_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
+MODULE_DESCRIPTION("GPIO NAND Driver");
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
new file mode 100644 (file)
index 0000000..21fd4f1
--- /dev/null
@@ -0,0 +1,1077 @@
+/*
+ * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2008 Sascha Hauer, kernel@pengutronix.de
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+
+#include <asm/mach/flash.h>
+#include <mach/mxc_nand.h>
+
+#define DRIVER_NAME "mxc_nand"
+
+/* Addresses for NFC registers */
+#define NFC_BUF_SIZE           0xE00
+#define NFC_BUF_ADDR           0xE04
+#define NFC_FLASH_ADDR         0xE06
+#define NFC_FLASH_CMD          0xE08
+#define NFC_CONFIG             0xE0A
+#define NFC_ECC_STATUS_RESULT  0xE0C
+#define NFC_RSLTMAIN_AREA      0xE0E
+#define NFC_RSLTSPARE_AREA     0xE10
+#define NFC_WRPROT             0xE12
+#define NFC_UNLOCKSTART_BLKADDR        0xE14
+#define NFC_UNLOCKEND_BLKADDR  0xE16
+#define NFC_NF_WRPRST          0xE18
+#define NFC_CONFIG1            0xE1A
+#define NFC_CONFIG2            0xE1C
+
+/* Addresses for NFC RAM BUFFER Main area 0 */
+#define MAIN_AREA0             0x000
+#define MAIN_AREA1             0x200
+#define MAIN_AREA2             0x400
+#define MAIN_AREA3             0x600
+
+/* Addresses for NFC SPARE BUFFER Spare area 0 */
+#define SPARE_AREA0            0x800
+#define SPARE_AREA1            0x810
+#define SPARE_AREA2            0x820
+#define SPARE_AREA3            0x830
+
+/* Set INT to 0, FCMD to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Command operation */
+#define NFC_CMD            0x1
+
+/* Set INT to 0, FADD to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Address operation */
+#define NFC_ADDR           0x2
+
+/* Set INT to 0, FDI to 1, rest to 0 in NFC_CONFIG2 Register
+ * for Input operation */
+#define NFC_INPUT          0x4
+
+/* Set INT to 0, FDO to 001, rest to 0 in NFC_CONFIG2 Register
+ * for Data Output operation */
+#define NFC_OUTPUT         0x8
+
+/* Set INT to 0, FD0 to 010, rest to 0 in NFC_CONFIG2 Register
+ * for Read ID operation */
+#define NFC_ID             0x10
+
+/* Set INT to 0, FDO to 100, rest to 0 in NFC_CONFIG2 Register
+ * for Read Status operation */
+#define NFC_STATUS         0x20
+
+/* Set INT to 1, rest to 0 in NFC_CONFIG2 Register for Read
+ * Status operation */
+#define NFC_INT            0x8000
+
+#define NFC_SP_EN           (1 << 2)
+#define NFC_ECC_EN          (1 << 3)
+#define NFC_INT_MSK         (1 << 4)
+#define NFC_BIG             (1 << 5)
+#define NFC_RST             (1 << 6)
+#define NFC_CE              (1 << 7)
+#define NFC_ONE_CYCLE       (1 << 8)
+
+struct mxc_nand_host {
+       struct mtd_info         mtd;
+       struct nand_chip        nand;
+       struct mtd_partition    *parts;
+       struct device           *dev;
+
+       void __iomem            *regs;
+       int                     spare_only;
+       int                     status_request;
+       int                     pagesize_2k;
+       uint16_t                col_addr;
+       struct clk              *clk;
+       int                     clk_act;
+       int                     irq;
+
+       wait_queue_head_t       irq_waitq;
+};
+
+/* Define delays in microsec for NAND device operations */
+#define TROP_US_DELAY   2000
+/* Macros to get byte and bit positions of ECC */
+#define COLPOS(x)  ((x) >> 3)
+#define BITPOS(x) ((x) & 0xf)
+
+/* Define single bit Error positions in Main & Spare area */
+#define MAIN_SINGLEBIT_ERROR 0x4
+#define SPARE_SINGLEBIT_ERROR 0x1
+
+/* OOB placement block for use with hardware ecc generation */
+static struct nand_ecclayout nand_hw_eccoob_8 = {
+       .eccbytes = 5,
+       .eccpos = {6, 7, 8, 9, 10},
+       .oobfree = {{0, 5}, {11, 5}, }
+};
+
+static struct nand_ecclayout nand_hw_eccoob_16 = {
+       .eccbytes = 5,
+       .eccpos = {6, 7, 8, 9, 10},
+       .oobfree = {{0, 6}, {12, 4}, }
+};
+
+#ifdef CONFIG_MTD_PARTITIONS
+static const char *part_probes[] = { "RedBoot", "cmdlinepart", NULL };
+#endif
+
+static irqreturn_t mxc_nfc_irq(int irq, void *dev_id)
+{
+       struct mxc_nand_host *host = dev_id;
+
+       uint16_t tmp;
+
+       tmp = readw(host->regs + NFC_CONFIG1);
+       tmp |= NFC_INT_MSK; /* Disable interrupt */
+       writew(tmp, host->regs + NFC_CONFIG1);
+
+       wake_up(&host->irq_waitq);
+
+       return IRQ_HANDLED;
+}
+
+/* This function polls the NANDFC to wait for the basic operation to
+ * complete by checking the INT bit of config2 register.
+ */
+static void wait_op_done(struct mxc_nand_host *host, int max_retries,
+                               uint16_t param, int useirq)
+{
+       uint32_t tmp;
+
+       if (useirq) {
+               if ((readw(host->regs + NFC_CONFIG2) & NFC_INT) == 0) {
+
+                       tmp = readw(host->regs + NFC_CONFIG1);
+                       tmp  &= ~NFC_INT_MSK;   /* Enable interrupt */
+                       writew(tmp, host->regs + NFC_CONFIG1);
+
+                       wait_event(host->irq_waitq,
+                               readw(host->regs + NFC_CONFIG2) & NFC_INT);
+
+                       tmp = readw(host->regs + NFC_CONFIG2);
+                       tmp  &= ~NFC_INT;
+                       writew(tmp, host->regs + NFC_CONFIG2);
+               }
+       } else {
+               while (max_retries-- > 0) {
+                       if (readw(host->regs + NFC_CONFIG2) & NFC_INT) {
+                               tmp = readw(host->regs + NFC_CONFIG2);
+                               tmp  &= ~NFC_INT;
+                               writew(tmp, host->regs + NFC_CONFIG2);
+                               break;
+                       }
+                       udelay(1);
+               }
+               if (max_retries <= 0)
+                       DEBUG(MTD_DEBUG_LEVEL0, "%s(%d): INT not set\n",
+                             __func__, param);
+       }
+}
+
+/* This function issues the specified command to the NAND device and
+ * waits for completion. */
+static void send_cmd(struct mxc_nand_host *host, uint16_t cmd, int useirq)
+{
+       DEBUG(MTD_DEBUG_LEVEL3, "send_cmd(host, 0x%x, %d)\n", cmd, useirq);
+
+       writew(cmd, host->regs + NFC_FLASH_CMD);
+       writew(NFC_CMD, host->regs + NFC_CONFIG2);
+
+       /* Wait for operation to complete */
+       wait_op_done(host, TROP_US_DELAY, cmd, useirq);
+}
+
+/* This function sends an address (or partial address) to the
+ * NAND device. The address is used to select the source/destination for
+ * a NAND command. */
+static void send_addr(struct mxc_nand_host *host, uint16_t addr, int islast)
+{
+       DEBUG(MTD_DEBUG_LEVEL3, "send_addr(host, 0x%x %d)\n", addr, islast);
+
+       writew(addr, host->regs + NFC_FLASH_ADDR);
+       writew(NFC_ADDR, host->regs + NFC_CONFIG2);
+
+       /* Wait for operation to complete */
+       wait_op_done(host, TROP_US_DELAY, addr, islast);
+}
+
+/* This function requests the NANDFC to initate the transfer
+ * of data currently in the NANDFC RAM buffer to the NAND device. */
+static void send_prog_page(struct mxc_nand_host *host, uint8_t buf_id,
+                       int spare_only)
+{
+       DEBUG(MTD_DEBUG_LEVEL3, "send_prog_page (%d)\n", spare_only);
+
+       /* NANDFC buffer 0 is used for page read/write */
+       writew(buf_id, host->regs + NFC_BUF_ADDR);
+
+       /* Configure spare or page+spare access */
+       if (!host->pagesize_2k) {
+               uint16_t config1 = readw(host->regs + NFC_CONFIG1);
+               if (spare_only)
+                       config1 |= NFC_SP_EN;
+               else
+                       config1 &= ~(NFC_SP_EN);
+               writew(config1, host->regs + NFC_CONFIG1);
+       }
+
+       writew(NFC_INPUT, host->regs + NFC_CONFIG2);
+
+       /* Wait for operation to complete */
+       wait_op_done(host, TROP_US_DELAY, spare_only, true);
+}
+
+/* Requests NANDFC to initated the transfer of data from the
+ * NAND device into in the NANDFC ram buffer. */
+static void send_read_page(struct mxc_nand_host *host, uint8_t buf_id,
+               int spare_only)
+{
+       DEBUG(MTD_DEBUG_LEVEL3, "send_read_page (%d)\n", spare_only);
+
+       /* NANDFC buffer 0 is used for page read/write */
+       writew(buf_id, host->regs + NFC_BUF_ADDR);
+
+       /* Configure spare or page+spare access */
+       if (!host->pagesize_2k) {
+               uint32_t config1 = readw(host->regs + NFC_CONFIG1);
+               if (spare_only)
+                       config1 |= NFC_SP_EN;
+               else
+                       config1 &= ~NFC_SP_EN;
+               writew(config1, host->regs + NFC_CONFIG1);
+       }
+
+       writew(NFC_OUTPUT, host->regs + NFC_CONFIG2);
+
+       /* Wait for operation to complete */
+       wait_op_done(host, TROP_US_DELAY, spare_only, true);
+}
+
+/* Request the NANDFC to perform a read of the NAND device ID. */
+static void send_read_id(struct mxc_nand_host *host)
+{
+       struct nand_chip *this = &host->nand;
+       uint16_t tmp;
+
+       /* NANDFC buffer 0 is used for device ID output */
+       writew(0x0, host->regs + NFC_BUF_ADDR);
+
+       /* Read ID into main buffer */
+       tmp = readw(host->regs + NFC_CONFIG1);
+       tmp &= ~NFC_SP_EN;
+       writew(tmp, host->regs + NFC_CONFIG1);
+
+       writew(NFC_ID, host->regs + NFC_CONFIG2);
+
+       /* Wait for operation to complete */
+       wait_op_done(host, TROP_US_DELAY, 0, true);
+
+       if (this->options & NAND_BUSWIDTH_16) {
+               void __iomem *main_buf = host->regs + MAIN_AREA0;
+               /* compress the ID info */
+               writeb(readb(main_buf + 2), main_buf + 1);
+               writeb(readb(main_buf + 4), main_buf + 2);
+               writeb(readb(main_buf + 6), main_buf + 3);
+               writeb(readb(main_buf + 8), main_buf + 4);
+               writeb(readb(main_buf + 10), main_buf + 5);
+       }
+}
+
+/* This function requests the NANDFC to perform a read of the
+ * NAND device status and returns the current status. */
+static uint16_t get_dev_status(struct mxc_nand_host *host)
+{
+       void __iomem *main_buf = host->regs + MAIN_AREA1;
+       uint32_t store;
+       uint16_t ret, tmp;
+       /* Issue status request to NAND device */
+
+       /* store the main area1 first word, later do recovery */
+       store = readl(main_buf);
+       /* NANDFC buffer 1 is used for device status to prevent
+        * corruption of read/write buffer on status requests. */
+       writew(1, host->regs + NFC_BUF_ADDR);
+
+       /* Read status into main buffer */
+       tmp = readw(host->regs + NFC_CONFIG1);
+       tmp &= ~NFC_SP_EN;
+       writew(tmp, host->regs + NFC_CONFIG1);
+
+       writew(NFC_STATUS, host->regs + NFC_CONFIG2);
+
+       /* Wait for operation to complete */
+       wait_op_done(host, TROP_US_DELAY, 0, true);
+
+       /* Status is placed in first word of main buffer */
+       /* get status, then recovery area 1 data */
+       ret = readw(main_buf);
+       writel(store, main_buf);
+
+       return ret;
+}
+
+/* This functions is used by upper layer to checks if device is ready */
+static int mxc_nand_dev_ready(struct mtd_info *mtd)
+{
+       /*
+        * NFC handles R/B internally. Therefore, this function
+        * always returns status as ready.
+        */
+       return 1;
+}
+
+static void mxc_nand_enable_hwecc(struct mtd_info *mtd, int mode)
+{
+       /*
+        * If HW ECC is enabled, we turn it on during init. There is
+        * no need to enable again here.
+        */
+}
+
+static int mxc_nand_correct_data(struct mtd_info *mtd, u_char *dat,
+                                u_char *read_ecc, u_char *calc_ecc)
+{
+       struct nand_chip *nand_chip = mtd->priv;
+       struct mxc_nand_host *host = nand_chip->priv;
+
+       /*
+        * 1-Bit errors are automatically corrected in HW.  No need for
+        * additional correction.  2-Bit errors cannot be corrected by
+        * HW ECC, so we need to return failure
+        */
+       uint16_t ecc_status = readw(host->regs + NFC_ECC_STATUS_RESULT);
+
+       if (((ecc_status & 0x3) == 2) || ((ecc_status >> 2) == 2)) {
+               DEBUG(MTD_DEBUG_LEVEL0,
+                     "MXC_NAND: HWECC uncorrectable 2-bit ECC error\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int mxc_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+                                 u_char *ecc_code)
+{
+       return 0;
+}
+
+static u_char mxc_nand_read_byte(struct mtd_info *mtd)
+{
+       struct nand_chip *nand_chip = mtd->priv;
+       struct mxc_nand_host *host = nand_chip->priv;
+       uint8_t ret = 0;
+       uint16_t col, rd_word;
+       uint16_t __iomem *main_buf = host->regs + MAIN_AREA0;
+       uint16_t __iomem *spare_buf = host->regs + SPARE_AREA0;
+
+       /* Check for status request */
+       if (host->status_request)
+               return get_dev_status(host) & 0xFF;
+
+       /* Get column for 16-bit access */
+       col = host->col_addr >> 1;
+
+       /* If we are accessing the spare region */
+       if (host->spare_only)
+               rd_word = readw(&spare_buf[col]);
+       else
+               rd_word = readw(&main_buf[col]);
+
+       /* Pick upper/lower byte of word from RAM buffer */
+       if (host->col_addr & 0x1)
+               ret = (rd_word >> 8) & 0xFF;
+       else
+               ret = rd_word & 0xFF;
+
+       /* Update saved column address */
+       host->col_addr++;
+
+       return ret;
+}
+
+static uint16_t mxc_nand_read_word(struct mtd_info *mtd)
+{
+       struct nand_chip *nand_chip = mtd->priv;
+       struct mxc_nand_host *host = nand_chip->priv;
+       uint16_t col, rd_word, ret;
+       uint16_t __iomem *p;
+
+       DEBUG(MTD_DEBUG_LEVEL3,
+             "mxc_nand_read_word(col = %d)\n", host->col_addr);
+
+       col = host->col_addr;
+       /* Adjust saved column address */
+       if (col < mtd->writesize && host->spare_only)
+               col += mtd->writesize;
+
+       if (col < mtd->writesize)
+               p = (host->regs + MAIN_AREA0) + (col >> 1);
+       else
+               p = (host->regs + SPARE_AREA0) + ((col - mtd->writesize) >> 1);
+
+       if (col & 1) {
+               rd_word = readw(p);
+               ret = (rd_word >> 8) & 0xff;
+               rd_word = readw(&p[1]);
+               ret |= (rd_word << 8) & 0xff00;
+
+       } else
+               ret = readw(p);
+
+       /* Update saved column address */
+       host->col_addr = col + 2;
+
+       return ret;
+}
+
+/* Write data of length len to buffer buf. The data to be
+ * written on NAND Flash is first copied to RAMbuffer. After the Data Input
+ * Operation by the NFC, the data is written to NAND Flash */
+static void mxc_nand_write_buf(struct mtd_info *mtd,
+                               const u_char *buf, int len)
+{
+       struct nand_chip *nand_chip = mtd->priv;
+       struct mxc_nand_host *host = nand_chip->priv;
+       int n, col, i = 0;
+
+       DEBUG(MTD_DEBUG_LEVEL3,
+             "mxc_nand_write_buf(col = %d, len = %d)\n", host->col_addr,
+             len);
+
+       col = host->col_addr;
+
+       /* Adjust saved column address */
+       if (col < mtd->writesize && host->spare_only)
+               col += mtd->writesize;
+
+       n = mtd->writesize + mtd->oobsize - col;
+       n = min(len, n);
+
+       DEBUG(MTD_DEBUG_LEVEL3,
+             "%s:%d: col = %d, n = %d\n", __func__, __LINE__, col, n);
+
+       while (n) {
+               void __iomem *p;
+
+               if (col < mtd->writesize)
+                       p = host->regs + MAIN_AREA0 + (col & ~3);
+               else
+                       p = host->regs + SPARE_AREA0 -
+                                               mtd->writesize + (col & ~3);
+
+               DEBUG(MTD_DEBUG_LEVEL3, "%s:%d: p = %p\n", __func__,
+                     __LINE__, p);
+
+               if (((col | (int)&buf[i]) & 3) || n < 16) {
+                       uint32_t data = 0;
+
+                       if (col & 3 || n < 4)
+                               data = readl(p);
+
+                       switch (col & 3) {
+                       case 0:
+                               if (n) {
+                                       data = (data & 0xffffff00) |
+                                           (buf[i++] << 0);
+                                       n--;
+                                       col++;
+                               }
+                       case 1:
+                               if (n) {
+                                       data = (data & 0xffff00ff) |
+                                           (buf[i++] << 8);
+                                       n--;
+                                       col++;
+                               }
+                       case 2:
+                               if (n) {
+                                       data = (data & 0xff00ffff) |
+                                           (buf[i++] << 16);
+                                       n--;
+                                       col++;
+                               }
+                       case 3:
+                               if (n) {
+                                       data = (data & 0x00ffffff) |
+                                           (buf[i++] << 24);
+                                       n--;
+                                       col++;
+                               }
+                       }
+
+                       writel(data, p);
+               } else {
+                       int m = mtd->writesize - col;
+
+                       if (col >= mtd->writesize)
+                               m += mtd->oobsize;
+
+                       m = min(n, m) & ~3;
+
+                       DEBUG(MTD_DEBUG_LEVEL3,
+                             "%s:%d: n = %d, m = %d, i = %d, col = %d\n",
+                             __func__,  __LINE__, n, m, i, col);
+
+                       memcpy(p, &buf[i], m);
+                       col += m;
+                       i += m;
+                       n -= m;
+               }
+       }
+       /* Update saved column address */
+       host->col_addr = col;
+}
+
+/* Read the data buffer from the NAND Flash. To read the data from NAND
+ * Flash first the data output cycle is initiated by the NFC, which copies
+ * the data to RAMbuffer. This data of length len is then copied to buffer buf.
+ */
+static void mxc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+{
+       struct nand_chip *nand_chip = mtd->priv;
+       struct mxc_nand_host *host = nand_chip->priv;
+       int n, col, i = 0;
+
+       DEBUG(MTD_DEBUG_LEVEL3,
+             "mxc_nand_read_buf(col = %d, len = %d)\n", host->col_addr, len);
+
+       col = host->col_addr;
+
+       /* Adjust saved column address */
+       if (col < mtd->writesize && host->spare_only)
+               col += mtd->writesize;
+
+       n = mtd->writesize + mtd->oobsize - col;
+       n = min(len, n);
+
+       while (n) {
+               void __iomem *p;
+
+               if (col < mtd->writesize)
+                       p = host->regs + MAIN_AREA0 + (col & ~3);
+               else
+                       p = host->regs + SPARE_AREA0 -
+                                       mtd->writesize + (col & ~3);
+
+               if (((col | (int)&buf[i]) & 3) || n < 16) {
+                       uint32_t data;
+
+                       data = readl(p);
+                       switch (col & 3) {
+                       case 0:
+                               if (n) {
+                                       buf[i++] = (uint8_t) (data);
+                                       n--;
+                                       col++;
+                               }
+                       case 1:
+                               if (n) {
+                                       buf[i++] = (uint8_t) (data >> 8);
+                                       n--;
+                                       col++;
+                               }
+                       case 2:
+                               if (n) {
+                                       buf[i++] = (uint8_t) (data >> 16);
+                                       n--;
+                                       col++;
+                               }
+                       case 3:
+                               if (n) {
+                                       buf[i++] = (uint8_t) (data >> 24);
+                                       n--;
+                                       col++;
+                               }
+                       }
+               } else {
+                       int m = mtd->writesize - col;
+
+                       if (col >= mtd->writesize)
+                               m += mtd->oobsize;
+
+                       m = min(n, m) & ~3;
+                       memcpy(&buf[i], p, m);
+                       col += m;
+                       i += m;
+                       n -= m;
+               }
+       }
+       /* Update saved column address */
+       host->col_addr = col;
+
+}
+
+/* Used by the upper layer to verify the data in NAND Flash
+ * with the data in the buf. */
+static int mxc_nand_verify_buf(struct mtd_info *mtd,
+                               const u_char *buf, int len)
+{
+       return -EFAULT;
+}
+
+/* This function is used by upper layer for select and
+ * deselect of the NAND chip */
+static void mxc_nand_select_chip(struct mtd_info *mtd, int chip)
+{
+       struct nand_chip *nand_chip = mtd->priv;
+       struct mxc_nand_host *host = nand_chip->priv;
+
+#ifdef CONFIG_MTD_NAND_MXC_FORCE_CE
+       if (chip > 0) {
+               DEBUG(MTD_DEBUG_LEVEL0,
+                     "ERROR:  Illegal chip select (chip = %d)\n", chip);
+               return;
+       }
+
+       if (chip == -1) {
+               writew(readw(host->regs + NFC_CONFIG1) & ~NFC_CE,
+                               host->regs + NFC_CONFIG1);
+               return;
+       }
+
+       writew(readw(host->regs + NFC_CONFIG1) | NFC_CE,
+                       host->regs + NFC_CONFIG1);
+#endif
+
+       switch (chip) {
+       case -1:
+               /* Disable the NFC clock */
+               if (host->clk_act) {
+                       clk_disable(host->clk);
+                       host->clk_act = 0;
+               }
+               break;
+       case 0:
+               /* Enable the NFC clock */
+               if (!host->clk_act) {
+                       clk_enable(host->clk);
+                       host->clk_act = 1;
+               }
+               break;
+
+       default:
+               break;
+       }
+}
+
+/* Used by the upper layer to write command to NAND Flash for
+ * different operations to be carried out on NAND Flash */
+static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
+                               int column, int page_addr)
+{
+       struct nand_chip *nand_chip = mtd->priv;
+       struct mxc_nand_host *host = nand_chip->priv;
+       int useirq = true;
+
+       DEBUG(MTD_DEBUG_LEVEL3,
+             "mxc_nand_command (cmd = 0x%x, col = 0x%x, page = 0x%x)\n",
+             command, column, page_addr);
+
+       /* Reset command state information */
+       host->status_request = false;
+
+       /* Command pre-processing step */
+       switch (command) {
+
+       case NAND_CMD_STATUS:
+               host->col_addr = 0;
+               host->status_request = true;
+               break;
+
+       case NAND_CMD_READ0:
+               host->col_addr = column;
+               host->spare_only = false;
+               useirq = false;
+               break;
+
+       case NAND_CMD_READOOB:
+               host->col_addr = column;
+               host->spare_only = true;
+               useirq = false;
+               if (host->pagesize_2k)
+                       command = NAND_CMD_READ0; /* only READ0 is valid */
+               break;
+
+       case NAND_CMD_SEQIN:
+               if (column >= mtd->writesize) {
+                       /*
+                        * FIXME: before send SEQIN command for write OOB,
+                        * We must read one page out.
+                        * For K9F1GXX has no READ1 command to set current HW
+                        * pointer to spare area, we must write the whole page
+                        * including OOB together.
+                        */
+                       if (host->pagesize_2k)
+                               /* call ourself to read a page */
+                               mxc_nand_command(mtd, NAND_CMD_READ0, 0,
+                                               page_addr);
+
+                       host->col_addr = column - mtd->writesize;
+                       host->spare_only = true;
+
+                       /* Set program pointer to spare region */
+                       if (!host->pagesize_2k)
+                               send_cmd(host, NAND_CMD_READOOB, false);
+               } else {
+                       host->spare_only = false;
+                       host->col_addr = column;
+
+                       /* Set program pointer to page start */
+                       if (!host->pagesize_2k)
+                               send_cmd(host, NAND_CMD_READ0, false);
+               }
+               useirq = false;
+               break;
+
+       case NAND_CMD_PAGEPROG:
+               send_prog_page(host, 0, host->spare_only);
+
+               if (host->pagesize_2k) {
+                       /* data in 4 areas datas */
+                       send_prog_page(host, 1, host->spare_only);
+                       send_prog_page(host, 2, host->spare_only);
+                       send_prog_page(host, 3, host->spare_only);
+               }
+
+               break;
+
+       case NAND_CMD_ERASE1:
+               useirq = false;
+               break;
+       }
+
+       /* Write out the command to the device. */
+       send_cmd(host, command, useirq);
+
+       /* Write out column address, if necessary */
+       if (column != -1) {
+               /*
+                * MXC NANDFC can only perform full page+spare or
+                * spare-only read/write.  When the upper layers
+                * layers perform a read/write buf operation,
+                * we will used the saved column adress to index into
+                * the full page.
+                */
+               send_addr(host, 0, page_addr == -1);
+               if (host->pagesize_2k)
+                       /* another col addr cycle for 2k page */
+                       send_addr(host, 0, false);
+       }
+
+       /* Write out page address, if necessary */
+       if (page_addr != -1) {
+               /* paddr_0 - p_addr_7 */
+               send_addr(host, (page_addr & 0xff), false);
+
+               if (host->pagesize_2k) {
+                       send_addr(host, (page_addr >> 8) & 0xFF, false);
+                       if (mtd->size >= 0x40000000)
+                               send_addr(host, (page_addr >> 16) & 0xff, true);
+               } else {
+                       /* One more address cycle for higher density devices */
+                       if (mtd->size >= 0x4000000) {
+                               /* paddr_8 - paddr_15 */
+                               send_addr(host, (page_addr >> 8) & 0xff, false);
+                               send_addr(host, (page_addr >> 16) & 0xff, true);
+                       } else
+                               /* paddr_8 - paddr_15 */
+                               send_addr(host, (page_addr >> 8) & 0xff, true);
+               }
+       }
+
+       /* Command post-processing step */
+       switch (command) {
+
+       case NAND_CMD_RESET:
+               break;
+
+       case NAND_CMD_READOOB:
+       case NAND_CMD_READ0:
+               if (host->pagesize_2k) {
+                       /* send read confirm command */
+                       send_cmd(host, NAND_CMD_READSTART, true);
+                       /* read for each AREA */
+                       send_read_page(host, 0, host->spare_only);
+                       send_read_page(host, 1, host->spare_only);
+                       send_read_page(host, 2, host->spare_only);
+                       send_read_page(host, 3, host->spare_only);
+               } else
+                       send_read_page(host, 0, host->spare_only);
+               break;
+
+       case NAND_CMD_READID:
+               send_read_id(host);
+               break;
+
+       case NAND_CMD_PAGEPROG:
+               break;
+
+       case NAND_CMD_STATUS:
+               break;
+
+       case NAND_CMD_ERASE2:
+               break;
+       }
+}
+
+static int __init mxcnd_probe(struct platform_device *pdev)
+{
+       struct nand_chip *this;
+       struct mtd_info *mtd;
+       struct mxc_nand_platform_data *pdata = pdev->dev.platform_data;
+       struct mxc_nand_host *host;
+       struct resource *res;
+       uint16_t tmp;
+       int err = 0, nr_parts = 0;
+
+       /* Allocate memory for MTD device structure and private data */
+       host = kzalloc(sizeof(struct mxc_nand_host), GFP_KERNEL);
+       if (!host)
+               return -ENOMEM;
+
+       host->dev = &pdev->dev;
+       /* structures must be linked */
+       this = &host->nand;
+       mtd = &host->mtd;
+       mtd->priv = this;
+       mtd->owner = THIS_MODULE;
+
+       /* 50 us command delay time */
+       this->chip_delay = 5;
+
+       this->priv = host;
+       this->dev_ready = mxc_nand_dev_ready;
+       this->cmdfunc = mxc_nand_command;
+       this->select_chip = mxc_nand_select_chip;
+       this->read_byte = mxc_nand_read_byte;
+       this->read_word = mxc_nand_read_word;
+       this->write_buf = mxc_nand_write_buf;
+       this->read_buf = mxc_nand_read_buf;
+       this->verify_buf = mxc_nand_verify_buf;
+
+       host->clk = clk_get(&pdev->dev, "nfc_clk");
+       if (IS_ERR(host->clk))
+               goto eclk;
+
+       clk_enable(host->clk);
+       host->clk_act = 1;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               err = -ENODEV;
+               goto eres;
+       }
+
+       host->regs = ioremap(res->start, res->end - res->start + 1);
+       if (!host->regs) {
+               err = -EIO;
+               goto eres;
+       }
+
+       tmp = readw(host->regs + NFC_CONFIG1);
+       tmp |= NFC_INT_MSK;
+       writew(tmp, host->regs + NFC_CONFIG1);
+
+       init_waitqueue_head(&host->irq_waitq);
+
+       host->irq = platform_get_irq(pdev, 0);
+
+       err = request_irq(host->irq, mxc_nfc_irq, 0, "mxc_nd", host);
+       if (err)
+               goto eirq;
+
+       if (pdata->hw_ecc) {
+               this->ecc.calculate = mxc_nand_calculate_ecc;
+               this->ecc.hwctl = mxc_nand_enable_hwecc;
+               this->ecc.correct = mxc_nand_correct_data;
+               this->ecc.mode = NAND_ECC_HW;
+               this->ecc.size = 512;
+               this->ecc.bytes = 3;
+               this->ecc.layout = &nand_hw_eccoob_8;
+               tmp = readw(host->regs + NFC_CONFIG1);
+               tmp |= NFC_ECC_EN;
+               writew(tmp, host->regs + NFC_CONFIG1);
+       } else {
+               this->ecc.size = 512;
+               this->ecc.bytes = 3;
+               this->ecc.layout = &nand_hw_eccoob_8;
+               this->ecc.mode = NAND_ECC_SOFT;
+               tmp = readw(host->regs + NFC_CONFIG1);
+               tmp &= ~NFC_ECC_EN;
+               writew(tmp, host->regs + NFC_CONFIG1);
+       }
+
+       /* Reset NAND */
+       this->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
+       /* preset operation */
+       /* Unlock the internal RAM Buffer */
+       writew(0x2, host->regs + NFC_CONFIG);
+
+       /* Blocks to be unlocked */
+       writew(0x0, host->regs + NFC_UNLOCKSTART_BLKADDR);
+       writew(0x4000, host->regs + NFC_UNLOCKEND_BLKADDR);
+
+       /* Unlock Block Command for given address range */
+       writew(0x4, host->regs + NFC_WRPROT);
+
+       /* NAND bus width determines access funtions used by upper layer */
+       if (pdata->width == 2) {
+               this->options |= NAND_BUSWIDTH_16;
+               this->ecc.layout = &nand_hw_eccoob_16;
+       }
+
+       host->pagesize_2k = 0;
+
+       /* Scan to find existence of the device */
+       if (nand_scan(mtd, 1)) {
+               DEBUG(MTD_DEBUG_LEVEL0,
+                     "MXC_ND: Unable to find any NAND device.\n");
+               err = -ENXIO;
+               goto escan;
+       }
+
+       /* Register the partitions */
+#ifdef CONFIG_MTD_PARTITIONS
+       nr_parts =
+           parse_mtd_partitions(mtd, part_probes, &host->parts, 0);
+       if (nr_parts > 0)
+               add_mtd_partitions(mtd, host->parts, nr_parts);
+       else
+#endif
+       {
+               pr_info("Registering %s as whole device\n", mtd->name);
+               add_mtd_device(mtd);
+       }
+
+       platform_set_drvdata(pdev, host);
+
+       return 0;
+
+escan:
+       free_irq(host->irq, NULL);
+eirq:
+       iounmap(host->regs);
+eres:
+       clk_put(host->clk);
+eclk:
+       kfree(host);
+
+       return err;
+}
+
+static int __devexit mxcnd_remove(struct platform_device *pdev)
+{
+       struct mxc_nand_host *host = platform_get_drvdata(pdev);
+
+       clk_put(host->clk);
+
+       platform_set_drvdata(pdev, NULL);
+
+       nand_release(&host->mtd);
+       free_irq(host->irq, NULL);
+       iounmap(host->regs);
+       kfree(host);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int mxcnd_suspend(struct platform_device *pdev, pm_message_t state)
+{
+       struct mtd_info *info = platform_get_drvdata(pdev);
+       int ret = 0;
+
+       DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND suspend\n");
+       if (info)
+               ret = info->suspend(info);
+
+       /* Disable the NFC clock */
+       clk_disable(nfc_clk);   /* FIXME */
+
+       return ret;
+}
+
+static int mxcnd_resume(struct platform_device *pdev)
+{
+       struct mtd_info *info = platform_get_drvdata(pdev);
+       int ret = 0;
+
+       DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND resume\n");
+       /* Enable the NFC clock */
+       clk_enable(nfc_clk);    /* FIXME */
+
+       if (info)
+               info->resume(info);
+
+       return ret;
+}
+
+#else
+# define mxcnd_suspend   NULL
+# define mxcnd_resume    NULL
+#endif                         /* CONFIG_PM */
+
+static struct platform_driver mxcnd_driver = {
+       .driver = {
+                  .name = DRIVER_NAME,
+                  },
+       .remove = __exit_p(mxcnd_remove),
+       .suspend = mxcnd_suspend,
+       .resume = mxcnd_resume,
+};
+
+static int __init mxc_nd_init(void)
+{
+       /* Register the device driver structure. */
+       pr_info("MXC MTD nand Driver\n");
+       if (platform_driver_probe(&mxcnd_driver, mxcnd_probe) != 0) {
+               printk(KERN_ERR "Driver register failed for mxcnd_driver\n");
+               return -ENODEV;
+       }
+       return 0;
+}
+
+static void __exit mxc_nd_cleanup(void)
+{
+       /* Unregister the device structure */
+       platform_driver_unregister(&mxcnd_driver);
+}
+
+module_init(mxc_nd_init);
+module_exit(mxc_nd_cleanup);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("MXC NAND MTD driver");
+MODULE_LICENSE("GPL");
index d1129bae6c27d20731ef0ead6b86a863e590e7f4..0a9c9cd33f96b0f15f7f1d6f87260baaf9fb05df 100644 (file)
@@ -801,9 +801,9 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
  * nand_read_subpage - [REPLACABLE] software ecc based sub-page read function
  * @mtd:       mtd info structure
  * @chip:      nand chip info structure
- * @dataofs    offset of requested data within the page
- * @readlen    data length
- * @buf:       buffer to store read data
+ * @data_offs: offset of requested data within the page
+ * @readlen:   data length
+ * @bufpoi:    buffer to store read data
  */
 static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi)
 {
@@ -2042,7 +2042,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
                return -EINVAL;
        }
 
-       instr->fail_addr = 0xffffffff;
+       instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
        /* Grab the lock and see if the device is available */
        nand_get_device(chip, mtd, FL_ERASING);
@@ -2318,6 +2318,12 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
        /* Select the device */
        chip->select_chip(mtd, 0);
 
+       /*
+        * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
+        * after power-up
+        */
+       chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+
        /* Send the command for reading device ID */
        chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 
@@ -2488,6 +2494,8 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips)
        /* Check for a chip array */
        for (i = 1; i < maxchips; i++) {
                chip->select_chip(mtd, i);
+               /* See comment in nand_get_flash_type for reset */
+               chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
                /* Send the command for reading device ID */
                chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
                /* Read manufacturer and device IDs */
index 918a806a8471f7aaa1b68b29e5e401a15737d4a4..868147acce2cec19677605e2bdcf67d8b5e9a14c 100644 (file)
@@ -1,13 +1,18 @@
 /*
- * This file contains an ECC algorithm from Toshiba that detects and
- * corrects 1 bit errors in a 256 byte block of data.
+ * This file contains an ECC algorithm that detects and corrects 1 bit
+ * errors in a 256 byte block of data.
  *
  * drivers/mtd/nand/nand_ecc.c
  *
- * Copyright (C) 2000-2004 Steven J. Hill (sjhill@realitydiluted.com)
- *                         Toshiba America Electronics Components, Inc.
+ * Copyright Â© 2008 Koninklijke Philips Electronics NV.
+ *                  Author: Frans Meulenbroeks
  *
- * Copyright (C) 2006 Thomas Gleixner <tglx@linutronix.de>
+ * Completely replaces the previous ECC implementation which was written by:
+ *   Steven J. Hill (sjhill@realitydiluted.com)
+ *   Thomas Gleixner (tglx@linutronix.de)
+ *
+ * Information on how this algorithm works and how it was developed
+ * can be found in Documentation/mtd/nand_ecc.txt
  *
  * This file is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * with this file; if not, write to the Free Software Foundation, Inc.,
  * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  *
- * As a special exception, if other files instantiate templates or use
- * macros or inline functions from these files, or you compile these
- * files and link them with other works to produce a work based on these
- * files, these files do not by themselves cause the resulting work to be
- * covered by the GNU General Public License. However the source code for
- * these files must still be made available in accordance with section (3)
- * of the GNU General Public License.
- *
- * This exception does not invalidate any other reasons why a work based on
- * this file might be covered by the GNU General Public License.
  */
 
+/*
+ * The STANDALONE macro is useful when running the code outside the kernel
+ * e.g. when running the code in a testbed or a benchmark program.
+ * When STANDALONE is used, the module related macros are commented out
+ * as well as the linux include files.
+ * Instead a private definition of mtd_info is given to satisfy the compiler
+ * (the code does not use mtd_info, so the code does not care)
+ */
+#ifndef STANDALONE
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
 #include <linux/mtd/nand_ecc.h>
+#include <asm/byteorder.h>
+#else
+#include <stdint.h>
+struct mtd_info;
+#define EXPORT_SYMBOL(x)  /* x */
+
+#define MODULE_LICENSE(x)      /* x */
+#define MODULE_AUTHOR(x)       /* x */
+#define MODULE_DESCRIPTION(x)  /* x */
+
+#define printk printf
+#define KERN_ERR               ""
+#endif
+
+/*
+ * invparity is a 256 byte table that contains the odd parity
+ * for each byte. So if the number of bits in a byte is even,
+ * the array element is 1, and when the number of bits is odd
+ * the array eleemnt is 0.
+ */
+static const char invparity[256] = {
+       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+       0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
+};
+
+/*
+ * bitsperbyte contains the number of bits per byte
+ * this is only used for testing and repairing parity
+ * (a precalculated value slightly improves performance)
+ */
+static const char bitsperbyte[256] = {
+       0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+       4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
+};
 
 /*
- * Pre-calculated 256-way 1 byte column parity
+ * addressbits is a lookup table to filter out the bits from the xor-ed
+ * ecc data that identify the faulty location.
+ * this is only used for repairing parity
+ * see the comments in nand_correct_data for more details
  */
-static const u_char nand_ecc_precalc_table[] = {
-       0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00,
-       0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65,
-       0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66,
-       0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03,
-       0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69,
-       0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c,
-       0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f,
-       0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a,
-       0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, 0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a,
-       0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, 0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f,
-       0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, 0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c,
-       0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, 0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69,
-       0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, 0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03,
-       0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, 0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66,
-       0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, 0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65,
-       0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00
+static const char addressbits[256] = {
+       0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+       0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+       0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+       0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+       0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+       0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+       0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+       0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+       0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+       0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+       0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
+       0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
+       0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+       0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+       0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
+       0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
+       0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+       0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+       0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+       0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+       0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+       0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+       0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+       0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+       0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+       0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+       0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
+       0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
+       0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+       0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
+       0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
+       0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
 };
 
 /**
- * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block
+ * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
+ *                      block
  * @mtd:       MTD block structure
- * @dat:       raw data
- * @ecc_code:  buffer for ECC
+ * @buf:       input buffer with raw data
+ * @code:      output buffer with ECC
  */
-int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
-                      u_char *ecc_code)
+int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+                      unsigned char *code)
 {
-       uint8_t idx, reg1, reg2, reg3, tmp1, tmp2;
        int i;
+       const uint32_t *bp = (uint32_t *)buf;
+       /* 256 or 512 bytes/ecc  */
+       const uint32_t eccsize_mult =
+                       (((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
+       uint32_t cur;           /* current value in buffer */
+       /* rp0..rp15..rp17 are the various accumulated parities (per byte) */
+       uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+       uint32_t rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15, rp16;
+       uint32_t uninitialized_var(rp17);       /* to make compiler happy */
+       uint32_t par;           /* the cumulative parity for all data */
+       uint32_t tmppar;        /* the cumulative parity for this iteration;
+                                  for rp12, rp14 and rp16 at the end of the
+                                  loop */
+
+       par = 0;
+       rp4 = 0;
+       rp6 = 0;
+       rp8 = 0;
+       rp10 = 0;
+       rp12 = 0;
+       rp14 = 0;
+       rp16 = 0;
+
+       /*
+        * The loop is unrolled a number of times;
+        * This avoids if statements to decide on which rp value to update
+        * Also we process the data by longwords.
+        * Note: passing unaligned data might give a performance penalty.
+        * It is assumed that the buffers are aligned.
+        * tmppar is the cumulative sum of this iteration.
+        * needed for calculating rp12, rp14, rp16 and par
+        * also used as a performance improvement for rp6, rp8 and rp10
+        */
+       for (i = 0; i < eccsize_mult << 2; i++) {
+               cur = *bp++;
+               tmppar = cur;
+               rp4 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp6 ^= tmppar;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp4 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp8 ^= tmppar;
 
-       /* Initialize variables */
-       reg1 = reg2 = reg3 = 0;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp4 ^= cur;
+               rp6 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp6 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp4 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp10 ^= tmppar;
 
-       /* Build up column parity */
-       for(i = 0; i < 256; i++) {
-               /* Get CP0 - CP5 from table */
-               idx = nand_ecc_precalc_table[*dat++];
-               reg1 ^= (idx & 0x3f);
+               cur = *bp++;
+               tmppar ^= cur;
+               rp4 ^= cur;
+               rp6 ^= cur;
+               rp8 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp6 ^= cur;
+               rp8 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp4 ^= cur;
+               rp8 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp8 ^= cur;
 
-               /* All bit XOR = 1 ? */
-               if (idx & 0x40) {
-                       reg3 ^= (uint8_t) i;
-                       reg2 ^= ~((uint8_t) i);
-               }
+               cur = *bp++;
+               tmppar ^= cur;
+               rp4 ^= cur;
+               rp6 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp6 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+               rp4 ^= cur;
+               cur = *bp++;
+               tmppar ^= cur;
+
+               par ^= tmppar;
+               if ((i & 0x1) == 0)
+                       rp12 ^= tmppar;
+               if ((i & 0x2) == 0)
+                       rp14 ^= tmppar;
+               if (eccsize_mult == 2 && (i & 0x4) == 0)
+                       rp16 ^= tmppar;
        }
 
-       /* Create non-inverted ECC code from line parity */
-       tmp1  = (reg3 & 0x80) >> 0; /* B7 -> B7 */
-       tmp1 |= (reg2 & 0x80) >> 1; /* B7 -> B6 */
-       tmp1 |= (reg3 & 0x40) >> 1; /* B6 -> B5 */
-       tmp1 |= (reg2 & 0x40) >> 2; /* B6 -> B4 */
-       tmp1 |= (reg3 & 0x20) >> 2; /* B5 -> B3 */
-       tmp1 |= (reg2 & 0x20) >> 3; /* B5 -> B2 */
-       tmp1 |= (reg3 & 0x10) >> 3; /* B4 -> B1 */
-       tmp1 |= (reg2 & 0x10) >> 4; /* B4 -> B0 */
-
-       tmp2  = (reg3 & 0x08) << 4; /* B3 -> B7 */
-       tmp2 |= (reg2 & 0x08) << 3; /* B3 -> B6 */
-       tmp2 |= (reg3 & 0x04) << 3; /* B2 -> B5 */
-       tmp2 |= (reg2 & 0x04) << 2; /* B2 -> B4 */
-       tmp2 |= (reg3 & 0x02) << 2; /* B1 -> B3 */
-       tmp2 |= (reg2 & 0x02) << 1; /* B1 -> B2 */
-       tmp2 |= (reg3 & 0x01) << 1; /* B0 -> B1 */
-       tmp2 |= (reg2 & 0x01) << 0; /* B7 -> B0 */
-
-       /* Calculate final ECC code */
-#ifdef CONFIG_MTD_NAND_ECC_SMC
-       ecc_code[0] = ~tmp2;
-       ecc_code[1] = ~tmp1;
+       /*
+        * handle the fact that we use longword operations
+        * we'll bring rp4..rp14..rp16 back to single byte entities by
+        * shifting and xoring first fold the upper and lower 16 bits,
+        * then the upper and lower 8 bits.
+        */
+       rp4 ^= (rp4 >> 16);
+       rp4 ^= (rp4 >> 8);
+       rp4 &= 0xff;
+       rp6 ^= (rp6 >> 16);
+       rp6 ^= (rp6 >> 8);
+       rp6 &= 0xff;
+       rp8 ^= (rp8 >> 16);
+       rp8 ^= (rp8 >> 8);
+       rp8 &= 0xff;
+       rp10 ^= (rp10 >> 16);
+       rp10 ^= (rp10 >> 8);
+       rp10 &= 0xff;
+       rp12 ^= (rp12 >> 16);
+       rp12 ^= (rp12 >> 8);
+       rp12 &= 0xff;
+       rp14 ^= (rp14 >> 16);
+       rp14 ^= (rp14 >> 8);
+       rp14 &= 0xff;
+       if (eccsize_mult == 2) {
+               rp16 ^= (rp16 >> 16);
+               rp16 ^= (rp16 >> 8);
+               rp16 &= 0xff;
+       }
+
+       /*
+        * we also need to calculate the row parity for rp0..rp3
+        * This is present in par, because par is now
+        * rp3 rp3 rp2 rp2 in little endian and
+        * rp2 rp2 rp3 rp3 in big endian
+        * as well as
+        * rp1 rp0 rp1 rp0 in little endian and
+        * rp0 rp1 rp0 rp1 in big endian
+        * First calculate rp2 and rp3
+        */
+#ifdef __BIG_ENDIAN
+       rp2 = (par >> 16);
+       rp2 ^= (rp2 >> 8);
+       rp2 &= 0xff;
+       rp3 = par & 0xffff;
+       rp3 ^= (rp3 >> 8);
+       rp3 &= 0xff;
 #else
-       ecc_code[0] = ~tmp1;
-       ecc_code[1] = ~tmp2;
+       rp3 = (par >> 16);
+       rp3 ^= (rp3 >> 8);
+       rp3 &= 0xff;
+       rp2 = par & 0xffff;
+       rp2 ^= (rp2 >> 8);
+       rp2 &= 0xff;
 #endif
-       ecc_code[2] = ((~reg1) << 2) | 0x03;
 
-       return 0;
-}
-EXPORT_SYMBOL(nand_calculate_ecc);
+       /* reduce par to 16 bits then calculate rp1 and rp0 */
+       par ^= (par >> 16);
+#ifdef __BIG_ENDIAN
+       rp0 = (par >> 8) & 0xff;
+       rp1 = (par & 0xff);
+#else
+       rp1 = (par >> 8) & 0xff;
+       rp0 = (par & 0xff);
+#endif
 
-static inline int countbits(uint32_t byte)
-{
-       int res = 0;
+       /* finally reduce par to 8 bits */
+       par ^= (par >> 8);
+       par &= 0xff;
 
-       for (;byte; byte >>= 1)
-               res += byte & 0x01;
-       return res;
+       /*
+        * and calculate rp5..rp15..rp17
+        * note that par = rp4 ^ rp5 and due to the commutative property
+        * of the ^ operator we can say:
+        * rp5 = (par ^ rp4);
+        * The & 0xff seems superfluous, but benchmarking learned that
+        * leaving it out gives slightly worse results. No idea why, probably
+        * it has to do with the way the pipeline in pentium is organized.
+        */
+       rp5 = (par ^ rp4) & 0xff;
+       rp7 = (par ^ rp6) & 0xff;
+       rp9 = (par ^ rp8) & 0xff;
+       rp11 = (par ^ rp10) & 0xff;
+       rp13 = (par ^ rp12) & 0xff;
+       rp15 = (par ^ rp14) & 0xff;
+       if (eccsize_mult == 2)
+               rp17 = (par ^ rp16) & 0xff;
+
+       /*
+        * Finally calculate the ecc bits.
+        * Again here it might seem that there are performance optimisations
+        * possible, but benchmarks showed that on the system this is developed
+        * the code below is the fastest
+        */
+#ifdef CONFIG_MTD_NAND_ECC_SMC
+       code[0] =
+           (invparity[rp7] << 7) |
+           (invparity[rp6] << 6) |
+           (invparity[rp5] << 5) |
+           (invparity[rp4] << 4) |
+           (invparity[rp3] << 3) |
+           (invparity[rp2] << 2) |
+           (invparity[rp1] << 1) |
+           (invparity[rp0]);
+       code[1] =
+           (invparity[rp15] << 7) |
+           (invparity[rp14] << 6) |
+           (invparity[rp13] << 5) |
+           (invparity[rp12] << 4) |
+           (invparity[rp11] << 3) |
+           (invparity[rp10] << 2) |
+           (invparity[rp9] << 1)  |
+           (invparity[rp8]);
+#else
+       code[1] =
+           (invparity[rp7] << 7) |
+           (invparity[rp6] << 6) |
+           (invparity[rp5] << 5) |
+           (invparity[rp4] << 4) |
+           (invparity[rp3] << 3) |
+           (invparity[rp2] << 2) |
+           (invparity[rp1] << 1) |
+           (invparity[rp0]);
+       code[0] =
+           (invparity[rp15] << 7) |
+           (invparity[rp14] << 6) |
+           (invparity[rp13] << 5) |
+           (invparity[rp12] << 4) |
+           (invparity[rp11] << 3) |
+           (invparity[rp10] << 2) |
+           (invparity[rp9] << 1)  |
+           (invparity[rp8]);
+#endif
+       if (eccsize_mult == 1)
+               code[2] =
+                   (invparity[par & 0xf0] << 7) |
+                   (invparity[par & 0x0f] << 6) |
+                   (invparity[par & 0xcc] << 5) |
+                   (invparity[par & 0x33] << 4) |
+                   (invparity[par & 0xaa] << 3) |
+                   (invparity[par & 0x55] << 2) |
+                   3;
+       else
+               code[2] =
+                   (invparity[par & 0xf0] << 7) |
+                   (invparity[par & 0x0f] << 6) |
+                   (invparity[par & 0xcc] << 5) |
+                   (invparity[par & 0x33] << 4) |
+                   (invparity[par & 0xaa] << 3) |
+                   (invparity[par & 0x55] << 2) |
+                   (invparity[rp17] << 1) |
+                   (invparity[rp16] << 0);
+       return 0;
 }
+EXPORT_SYMBOL(nand_calculate_ecc);
 
 /**
  * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
  * @mtd:       MTD block structure
- * @dat:       raw data read from the chip
+ * @buf:       raw data read from the chip
  * @read_ecc:  ECC from the chip
  * @calc_ecc:  the ECC calculated from raw data
  *
- * Detect and correct a 1 bit error for 256 byte block
+ * Detect and correct a 1 bit error for 256/512 byte block
  */
-int nand_correct_data(struct mtd_info *mtd, u_char *dat,
-                     u_char *read_ecc, u_char *calc_ecc)
+int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
+                     unsigned char *read_ecc, unsigned char *calc_ecc)
 {
-       uint8_t s0, s1, s2;
+       unsigned char b0, b1, b2;
+       unsigned char byte_addr, bit_addr;
+       /* 256 or 512 bytes/ecc  */
+       const uint32_t eccsize_mult =
+                       (((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
 
+       /*
+        * b0 to b2 indicate which bit is faulty (if any)
+        * we might need the xor result  more than once,
+        * so keep them in a local var
+       */
 #ifdef CONFIG_MTD_NAND_ECC_SMC
-       s0 = calc_ecc[0] ^ read_ecc[0];
-       s1 = calc_ecc[1] ^ read_ecc[1];
-       s2 = calc_ecc[2] ^ read_ecc[2];
+       b0 = read_ecc[0] ^ calc_ecc[0];
+       b1 = read_ecc[1] ^ calc_ecc[1];
 #else
-       s1 = calc_ecc[0] ^ read_ecc[0];
-       s0 = calc_ecc[1] ^ read_ecc[1];
-       s2 = calc_ecc[2] ^ read_ecc[2];
+       b0 = read_ecc[1] ^ calc_ecc[1];
+       b1 = read_ecc[0] ^ calc_ecc[0];
 #endif
-       if ((s0 | s1 | s2) == 0)
-               return 0;
-
-       /* Check for a single bit error */
-       if( ((s0 ^ (s0 >> 1)) & 0x55) == 0x55 &&
-           ((s1 ^ (s1 >> 1)) & 0x55) == 0x55 &&
-           ((s2 ^ (s2 >> 1)) & 0x54) == 0x54) {
+       b2 = read_ecc[2] ^ calc_ecc[2];
 
-               uint32_t byteoffs, bitnum;
+       /* check if there are any bitfaults */
 
-               byteoffs = (s1 << 0) & 0x80;
-               byteoffs |= (s1 << 1) & 0x40;
-               byteoffs |= (s1 << 2) & 0x20;
-               byteoffs |= (s1 << 3) & 0x10;
+       /* repeated if statements are slightly more efficient than switch ... */
+       /* ordered in order of likelihood */
 
-               byteoffs |= (s0 >> 4) & 0x08;
-               byteoffs |= (s0 >> 3) & 0x04;
-               byteoffs |= (s0 >> 2) & 0x02;
-               byteoffs |= (s0 >> 1) & 0x01;
-
-               bitnum = (s2 >> 5) & 0x04;
-               bitnum |= (s2 >> 4) & 0x02;
-               bitnum |= (s2 >> 3) & 0x01;
-
-               dat[byteoffs] ^= (1 << bitnum);
+       if ((b0 | b1 | b2) == 0)
+               return 0;       /* no error */
 
+       if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
+           (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
+           ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
+            (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
+       /* single bit error */
+               /*
+                * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
+                * byte, cp 5/3/1 indicate the faulty bit.
+                * A lookup table (called addressbits) is used to filter
+                * the bits from the byte they are in.
+                * A marginal optimisation is possible by having three
+                * different lookup tables.
+                * One as we have now (for b0), one for b2
+                * (that would avoid the >> 1), and one for b1 (with all values
+                * << 4). However it was felt that introducing two more tables
+                * hardly justify the gain.
+                *
+                * The b2 shift is there to get rid of the lowest two bits.
+                * We could also do addressbits[b2] >> 1 but for the
+                * performace it does not make any difference
+                */
+               if (eccsize_mult == 1)
+                       byte_addr = (addressbits[b1] << 4) + addressbits[b0];
+               else
+                       byte_addr = (addressbits[b2 & 0x3] << 8) +
+                                   (addressbits[b1] << 4) + addressbits[b0];
+               bit_addr = addressbits[b2 >> 2];
+               /* flip the bit */
+               buf[byte_addr] ^= (1 << bit_addr);
                return 1;
-       }
 
-       if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1)
-               return 1;
+       }
+       /* count nr of bits; use table lookup, faster than calculating it */
+       if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
+               return 1;       /* error in ecc data; no action needed */
 
-       return -EBADMSG;
+       printk(KERN_ERR "uncorrectable error : ");
+       return -1;
 }
 EXPORT_SYMBOL(nand_correct_data);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Steven J. Hill <sjhill@realitydiluted.com>");
+MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
 MODULE_DESCRIPTION("Generic NAND ECC support");
index 556e8131ecdcd68c9a685e5f5bea5f036d839222..ae7c57781a68e76885ca286b9a68a16d218358be 100644 (file)
@@ -38,7 +38,6 @@
 #include <linux/delay.h>
 #include <linux/list.h>
 #include <linux/random.h>
-#include <asm/div64.h>
 
 /* Default simulator parameters values */
 #if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE)  || \
index a64ad15b8fdd10affbde4c41c07c3c7a324af00b..c0fa9c9edf089c1a039ded475fb13740cfa52d1f 100644 (file)
@@ -115,55 +115,11 @@ enum {
        STATE_PIO_WRITING,
 };
 
-struct pxa3xx_nand_timing {
-       unsigned int    tCH;  /* Enable signal hold time */
-       unsigned int    tCS;  /* Enable signal setup time */
-       unsigned int    tWH;  /* ND_nWE high duration */
-       unsigned int    tWP;  /* ND_nWE pulse time */
-       unsigned int    tRH;  /* ND_nRE high duration */
-       unsigned int    tRP;  /* ND_nRE pulse width */
-       unsigned int    tR;   /* ND_nWE high to ND_nRE low for read */
-       unsigned int    tWHR; /* ND_nWE high to ND_nRE low for status read */
-       unsigned int    tAR;  /* ND_ALE low to ND_nRE low delay */
-};
-
-struct pxa3xx_nand_cmdset {
-       uint16_t        read1;
-       uint16_t        read2;
-       uint16_t        program;
-       uint16_t        read_status;
-       uint16_t        read_id;
-       uint16_t        erase;
-       uint16_t        reset;
-       uint16_t        lock;
-       uint16_t        unlock;
-       uint16_t        lock_status;
-};
-
-struct pxa3xx_nand_flash {
-       struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
-       struct pxa3xx_nand_cmdset *cmdset;
-
-       uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
-       uint32_t page_size;     /* Page size in bytes (PAGE_SZ) */
-       uint32_t flash_width;   /* Width of Flash memory (DWIDTH_M) */
-       uint32_t dfc_width;     /* Width of flash controller(DWIDTH_C) */
-       uint32_t num_blocks;    /* Number of physical blocks in Flash */
-       uint32_t chip_id;
-
-       /* NOTE: these are automatically calculated, do not define */
-       size_t          oob_size;
-       size_t          read_id_bytes;
-
-       unsigned int    col_addr_cycles;
-       unsigned int    row_addr_cycles;
-};
-
 struct pxa3xx_nand_info {
        struct nand_chip        nand_chip;
 
        struct platform_device   *pdev;
-       struct pxa3xx_nand_flash *flash_info;
+       const struct pxa3xx_nand_flash *flash_info;
 
        struct clk              *clk;
        void __iomem            *mmio_base;
@@ -202,12 +158,20 @@ struct pxa3xx_nand_info {
        uint32_t                ndcb0;
        uint32_t                ndcb1;
        uint32_t                ndcb2;
+
+       /* calculated from pxa3xx_nand_flash data */
+       size_t          oob_size;
+       size_t          read_id_bytes;
+
+       unsigned int    col_addr_cycles;
+       unsigned int    row_addr_cycles;
 };
 
 static int use_dma = 1;
 module_param(use_dma, bool, 0444);
 MODULE_PARM_DESC(use_dma, "enable DMA for data transfering to/from NAND HW");
 
+#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
 static struct pxa3xx_nand_cmdset smallpage_cmdset = {
        .read1          = 0x0000,
        .read2          = 0x0050,
@@ -291,11 +255,35 @@ static struct pxa3xx_nand_flash micron1GbX16 = {
        .chip_id        = 0xb12c,
 };
 
+static struct pxa3xx_nand_timing stm2GbX16_timing = {
+       .tCH = 10,
+       .tCS = 35,
+       .tWH = 15,
+       .tWP = 25,
+       .tRH = 15,
+       .tRP = 25,
+       .tR = 25000,
+       .tWHR = 60,
+       .tAR = 10,
+};
+
+static struct pxa3xx_nand_flash stm2GbX16 = {
+       .timing = &stm2GbX16_timing,
+       .page_per_block = 64,
+       .page_size = 2048,
+       .flash_width = 16,
+       .dfc_width = 16,
+       .num_blocks = 2048,
+       .chip_id = 0xba20,
+};
+
 static struct pxa3xx_nand_flash *builtin_flash_types[] = {
        &samsung512MbX16,
        &micron1GbX8,
        &micron1GbX16,
+       &stm2GbX16,
 };
+#endif /* CONFIG_MTD_NAND_PXA3xx_BUILTIN */
 
 #define NDTR0_tCH(c)   (min((c), 7) << 19)
 #define NDTR0_tCS(c)   (min((c), 7) << 16)
@@ -312,7 +300,7 @@ static struct pxa3xx_nand_flash *builtin_flash_types[] = {
 #define ns2cycle(ns, clk)      (int)(((ns) * (clk / 1000000) / 1000) + 1)
 
 static void pxa3xx_nand_set_timing(struct pxa3xx_nand_info *info,
-                                  struct pxa3xx_nand_timing *t)
+                                  const struct pxa3xx_nand_timing *t)
 {
        unsigned long nand_clk = clk_get_rate(info->clk);
        uint32_t ndtr0, ndtr1;
@@ -354,8 +342,8 @@ static int wait_for_event(struct pxa3xx_nand_info *info, uint32_t event)
 static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
                        uint16_t cmd, int column, int page_addr)
 {
-       struct pxa3xx_nand_flash *f = info->flash_info;
-       struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
+       const struct pxa3xx_nand_flash *f = info->flash_info;
+       const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
 
        /* calculate data size */
        switch (f->page_size) {
@@ -373,14 +361,14 @@ static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
        info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
        info->ndcb1 = 0;
        info->ndcb2 = 0;
-       info->ndcb0 |= NDCB0_ADDR_CYC(f->row_addr_cycles + f->col_addr_cycles);
+       info->ndcb0 |= NDCB0_ADDR_CYC(info->row_addr_cycles + info->col_addr_cycles);
 
-       if (f->col_addr_cycles == 2) {
+       if (info->col_addr_cycles == 2) {
                /* large block, 2 cycles for column address
                 * row address starts from 3rd cycle
                 */
                info->ndcb1 |= (page_addr << 16) | (column & 0xffff);
-               if (f->row_addr_cycles == 3)
+               if (info->row_addr_cycles == 3)
                        info->ndcb2 = (page_addr >> 16) & 0xff;
        } else
                /* small block, 1 cycles for column address
@@ -406,7 +394,7 @@ static int prepare_erase_cmd(struct pxa3xx_nand_info *info,
 
 static int prepare_other_cmd(struct pxa3xx_nand_info *info, uint16_t cmd)
 {
-       struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset;
+       const struct pxa3xx_nand_cmdset *cmdset = info->flash_info->cmdset;
 
        info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
        info->ndcb1 = 0;
@@ -641,8 +629,8 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
                                int column, int page_addr)
 {
        struct pxa3xx_nand_info *info = mtd->priv;
-       struct pxa3xx_nand_flash *flash_info = info->flash_info;
-       struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
+       const struct pxa3xx_nand_flash *flash_info = info->flash_info;
+       const struct pxa3xx_nand_cmdset *cmdset = flash_info->cmdset;
        int ret;
 
        info->use_dma = (use_dma) ? 1 : 0;
@@ -720,7 +708,7 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
                info->use_dma = 0;      /* force PIO read */
                info->buf_start = 0;
                info->buf_count = (command == NAND_CMD_READID) ?
-                               flash_info->read_id_bytes : 1;
+                               info->read_id_bytes : 1;
 
                if (prepare_other_cmd(info, (command == NAND_CMD_READID) ?
                                cmdset->read_id : cmdset->read_status))
@@ -861,8 +849,8 @@ static int pxa3xx_nand_ecc_correct(struct mtd_info *mtd,
 
 static int __readid(struct pxa3xx_nand_info *info, uint32_t *id)
 {
-       struct pxa3xx_nand_flash *f = info->flash_info;
-       struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
+       const struct pxa3xx_nand_flash *f = info->flash_info;
+       const struct pxa3xx_nand_cmdset *cmdset = f->cmdset;
        uint32_t ndcr;
        uint8_t  id_buff[8];
 
@@ -891,7 +879,7 @@ fail_timeout:
 }
 
 static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
-                                   struct pxa3xx_nand_flash *f)
+                                   const struct pxa3xx_nand_flash *f)
 {
        struct platform_device *pdev = info->pdev;
        struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
@@ -904,25 +892,25 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
                return -EINVAL;
 
        /* calculate flash information */
-       f->oob_size = (f->page_size == 2048) ? 64 : 16;
-       f->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
+       info->oob_size = (f->page_size == 2048) ? 64 : 16;
+       info->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
 
        /* calculate addressing information */
-       f->col_addr_cycles = (f->page_size == 2048) ? 2 : 1;
+       info->col_addr_cycles = (f->page_size == 2048) ? 2 : 1;
 
        if (f->num_blocks * f->page_per_block > 65536)
-               f->row_addr_cycles = 3;
+               info->row_addr_cycles = 3;
        else
-               f->row_addr_cycles = 2;
+               info->row_addr_cycles = 2;
 
        ndcr |= (pdata->enable_arbiter) ? NDCR_ND_ARB_EN : 0;
-       ndcr |= (f->col_addr_cycles == 2) ? NDCR_RA_START : 0;
+       ndcr |= (info->col_addr_cycles == 2) ? NDCR_RA_START : 0;
        ndcr |= (f->page_per_block == 64) ? NDCR_PG_PER_BLK : 0;
        ndcr |= (f->page_size == 2048) ? NDCR_PAGE_SZ : 0;
        ndcr |= (f->flash_width == 16) ? NDCR_DWIDTH_M : 0;
        ndcr |= (f->dfc_width == 16) ? NDCR_DWIDTH_C : 0;
 
-       ndcr |= NDCR_RD_ID_CNT(f->read_id_bytes);
+       ndcr |= NDCR_RD_ID_CNT(info->read_id_bytes);
        ndcr |= NDCR_SPARE_EN; /* enable spare by default */
 
        info->reg_ndcr = ndcr;
@@ -932,12 +920,27 @@ static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
        return 0;
 }
 
-static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info)
+static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
+                                   const struct pxa3xx_nand_platform_data *pdata)
 {
-       struct pxa3xx_nand_flash *f;
-       uint32_t id;
+       const struct pxa3xx_nand_flash *f;
+       uint32_t id = -1;
        int i;
 
+       for (i = 0; i<pdata->num_flash; ++i) {
+               f = pdata->flash + i;
+
+               if (pxa3xx_nand_config_flash(info, f))
+                       continue;
+
+               if (__readid(info, &id))
+                       continue;
+
+               if (id == f->chip_id)
+                       return 0;
+       }
+
+#ifdef CONFIG_MTD_NAND_PXA3xx_BUILTIN
        for (i = 0; i < ARRAY_SIZE(builtin_flash_types); i++) {
 
                f = builtin_flash_types[i];
@@ -951,7 +954,11 @@ static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info)
                if (id == f->chip_id)
                        return 0;
        }
+#endif
 
+       dev_warn(&info->pdev->dev,
+                "failed to detect configured nand flash; found %04x instead of\n",
+                id);
        return -ENODEV;
 }
 
@@ -1014,7 +1021,7 @@ static struct nand_ecclayout hw_largepage_ecclayout = {
 static void pxa3xx_nand_init_mtd(struct mtd_info *mtd,
                                 struct pxa3xx_nand_info *info)
 {
-       struct pxa3xx_nand_flash *f = info->flash_info;
+       const struct pxa3xx_nand_flash *f = info->flash_info;
        struct nand_chip *this = &info->nand_chip;
 
        this->options = (f->flash_width == 16) ? NAND_BUSWIDTH_16: 0;
@@ -1135,7 +1142,7 @@ static int pxa3xx_nand_probe(struct platform_device *pdev)
                goto fail_free_buf;
        }
 
-       ret = pxa3xx_nand_detect_flash(info);
+       ret = pxa3xx_nand_detect_flash(info, pdata);
        if (ret) {
                dev_err(&pdev->dev, "failed to detect flash\n");
                ret = -ENODEV;
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
new file mode 100644 (file)
index 0000000..821acb0
--- /dev/null
@@ -0,0 +1,878 @@
+/*
+ * SuperH FLCTL nand controller
+ *
+ * Copyright Â© 2008 Renesas Solutions Corp.
+ * Copyright Â© 2008 Atom Create Engineering Co., Ltd.
+ *
+ * Based on fsl_elbc_nand.c, Copyright Â© 2006-2007 Freescale Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/sh_flctl.h>
+
+static struct nand_ecclayout flctl_4secc_oob_16 = {
+       .eccbytes = 10,
+       .eccpos = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+       .oobfree = {
+               {.offset = 12,
+               . length = 4} },
+};
+
+static struct nand_ecclayout flctl_4secc_oob_64 = {
+       .eccbytes = 10,
+       .eccpos = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57},
+       .oobfree = {
+               {.offset = 60,
+               . length = 4} },
+};
+
+static uint8_t scan_ff_pattern[] = { 0xff, 0xff };
+
+static struct nand_bbt_descr flctl_4secc_smallpage = {
+       .options = NAND_BBT_SCAN2NDPAGE,
+       .offs = 11,
+       .len = 1,
+       .pattern = scan_ff_pattern,
+};
+
+static struct nand_bbt_descr flctl_4secc_largepage = {
+       .options = 0,
+       .offs = 58,
+       .len = 2,
+       .pattern = scan_ff_pattern,
+};
+
+static void empty_fifo(struct sh_flctl *flctl)
+{
+       writel(0x000c0000, FLINTDMACR(flctl));  /* FIFO Clear */
+       writel(0x00000000, FLINTDMACR(flctl));  /* Clear Error flags */
+}
+
+static void start_translation(struct sh_flctl *flctl)
+{
+       writeb(TRSTRT, FLTRCR(flctl));
+}
+
+static void wait_completion(struct sh_flctl *flctl)
+{
+       uint32_t timeout = LOOP_TIMEOUT_MAX;
+
+       while (timeout--) {
+               if (readb(FLTRCR(flctl)) & TREND) {
+                       writeb(0x0, FLTRCR(flctl));
+                       return;
+               }
+               udelay(1);
+       }
+
+       printk(KERN_ERR "wait_completion(): Timeout occured \n");
+       writeb(0x0, FLTRCR(flctl));
+}
+
+static void set_addr(struct mtd_info *mtd, int column, int page_addr)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       uint32_t addr = 0;
+
+       if (column == -1) {
+               addr = page_addr;       /* ERASE1 */
+       } else if (page_addr != -1) {
+               /* SEQIN, READ0, etc.. */
+               if (flctl->page_size) {
+                       addr = column & 0x0FFF;
+                       addr |= (page_addr & 0xff) << 16;
+                       addr |= ((page_addr >> 8) & 0xff) << 24;
+                       /* big than 128MB */
+                       if (flctl->rw_ADRCNT == ADRCNT2_E) {
+                               uint32_t        addr2;
+                               addr2 = (page_addr >> 16) & 0xff;
+                               writel(addr2, FLADR2(flctl));
+                       }
+               } else {
+                       addr = column;
+                       addr |= (page_addr & 0xff) << 8;
+                       addr |= ((page_addr >> 8) & 0xff) << 16;
+                       addr |= ((page_addr >> 16) & 0xff) << 24;
+               }
+       }
+       writel(addr, FLADR(flctl));
+}
+
+static void wait_rfifo_ready(struct sh_flctl *flctl)
+{
+       uint32_t timeout = LOOP_TIMEOUT_MAX;
+
+       while (timeout--) {
+               uint32_t val;
+               /* check FIFO */
+               val = readl(FLDTCNTR(flctl)) >> 16;
+               if (val & 0xFF)
+                       return;
+               udelay(1);
+       }
+       printk(KERN_ERR "wait_rfifo_ready(): Timeout occured \n");
+}
+
+static void wait_wfifo_ready(struct sh_flctl *flctl)
+{
+       uint32_t len, timeout = LOOP_TIMEOUT_MAX;
+
+       while (timeout--) {
+               /* check FIFO */
+               len = (readl(FLDTCNTR(flctl)) >> 16) & 0xFF;
+               if (len >= 4)
+                       return;
+               udelay(1);
+       }
+       printk(KERN_ERR "wait_wfifo_ready(): Timeout occured \n");
+}
+
+static int wait_recfifo_ready(struct sh_flctl *flctl)
+{
+       uint32_t timeout = LOOP_TIMEOUT_MAX;
+       int checked[4];
+       void __iomem *ecc_reg[4];
+       int i;
+       uint32_t data, size;
+
+       memset(checked, 0, sizeof(checked));
+
+       while (timeout--) {
+               size = readl(FLDTCNTR(flctl)) >> 24;
+               if (size & 0xFF)
+                       return 0;       /* success */
+
+               if (readl(FL4ECCCR(flctl)) & _4ECCFA)
+                       return 1;       /* can't correct */
+
+               udelay(1);
+               if (!(readl(FL4ECCCR(flctl)) & _4ECCEND))
+                       continue;
+
+               /* start error correction */
+               ecc_reg[0] = FL4ECCRESULT0(flctl);
+               ecc_reg[1] = FL4ECCRESULT1(flctl);
+               ecc_reg[2] = FL4ECCRESULT2(flctl);
+               ecc_reg[3] = FL4ECCRESULT3(flctl);
+
+               for (i = 0; i < 3; i++) {
+                       data = readl(ecc_reg[i]);
+                       if (data != INIT_FL4ECCRESULT_VAL && !checked[i]) {
+                               uint8_t org;
+                               int index;
+
+                               index = data >> 16;
+                               org = flctl->done_buff[index];
+                               flctl->done_buff[index] = org ^ (data & 0xFF);
+                               checked[i] = 1;
+                       }
+               }
+
+               writel(0, FL4ECCCR(flctl));
+       }
+
+       printk(KERN_ERR "wait_recfifo_ready(): Timeout occured \n");
+       return 1;       /* timeout */
+}
+
+static void wait_wecfifo_ready(struct sh_flctl *flctl)
+{
+       uint32_t timeout = LOOP_TIMEOUT_MAX;
+       uint32_t len;
+
+       while (timeout--) {
+               /* check FLECFIFO */
+               len = (readl(FLDTCNTR(flctl)) >> 24) & 0xFF;
+               if (len >= 4)
+                       return;
+               udelay(1);
+       }
+       printk(KERN_ERR "wait_wecfifo_ready(): Timeout occured \n");
+}
+
+static void read_datareg(struct sh_flctl *flctl, int offset)
+{
+       unsigned long data;
+       unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
+
+       wait_completion(flctl);
+
+       data = readl(FLDATAR(flctl));
+       *buf = le32_to_cpu(data);
+}
+
+static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+{
+       int i, len_4align;
+       unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
+       void *fifo_addr = (void *)FLDTFIFO(flctl);
+
+       len_4align = (rlen + 3) / 4;
+
+       for (i = 0; i < len_4align; i++) {
+               wait_rfifo_ready(flctl);
+               buf[i] = readl(fifo_addr);
+               buf[i] = be32_to_cpu(buf[i]);
+       }
+}
+
+static int read_ecfiforeg(struct sh_flctl *flctl, uint8_t *buff)
+{
+       int i;
+       unsigned long *ecc_buf = (unsigned long *)buff;
+       void *fifo_addr = (void *)FLECFIFO(flctl);
+
+       for (i = 0; i < 4; i++) {
+               if (wait_recfifo_ready(flctl))
+                       return 1;
+               ecc_buf[i] = readl(fifo_addr);
+               ecc_buf[i] = be32_to_cpu(ecc_buf[i]);
+       }
+
+       return 0;
+}
+
+static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+{
+       int i, len_4align;
+       unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
+       void *fifo_addr = (void *)FLDTFIFO(flctl);
+
+       len_4align = (rlen + 3) / 4;
+       for (i = 0; i < len_4align; i++) {
+               wait_wfifo_ready(flctl);
+               writel(cpu_to_be32(data[i]), fifo_addr);
+       }
+}
+
+static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_val)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       uint32_t flcmncr_val = readl(FLCMNCR(flctl));
+       uint32_t flcmdcr_val, addr_len_bytes = 0;
+
+       /* Set SNAND bit if page size is 2048byte */
+       if (flctl->page_size)
+               flcmncr_val |= SNAND_E;
+       else
+               flcmncr_val &= ~SNAND_E;
+
+       /* default FLCMDCR val */
+       flcmdcr_val = DOCMD1_E | DOADR_E;
+
+       /* Set for FLCMDCR */
+       switch (cmd) {
+       case NAND_CMD_ERASE1:
+               addr_len_bytes = flctl->erase_ADRCNT;
+               flcmdcr_val |= DOCMD2_E;
+               break;
+       case NAND_CMD_READ0:
+       case NAND_CMD_READOOB:
+               addr_len_bytes = flctl->rw_ADRCNT;
+               flcmdcr_val |= CDSRC_E;
+               break;
+       case NAND_CMD_SEQIN:
+               /* This case is that cmd is READ0 or READ1 or READ00 */
+               flcmdcr_val &= ~DOADR_E;        /* ONLY execute 1st cmd */
+               break;
+       case NAND_CMD_PAGEPROG:
+               addr_len_bytes = flctl->rw_ADRCNT;
+               flcmdcr_val |= DOCMD2_E | CDSRC_E | SELRW;
+               break;
+       case NAND_CMD_READID:
+               flcmncr_val &= ~SNAND_E;
+               addr_len_bytes = ADRCNT_1;
+               break;
+       case NAND_CMD_STATUS:
+       case NAND_CMD_RESET:
+               flcmncr_val &= ~SNAND_E;
+               flcmdcr_val &= ~(DOADR_E | DOSR_E);
+               break;
+       default:
+               break;
+       }
+
+       /* Set address bytes parameter */
+       flcmdcr_val |= addr_len_bytes;
+
+       /* Now actually write */
+       writel(flcmncr_val, FLCMNCR(flctl));
+       writel(flcmdcr_val, FLCMDCR(flctl));
+       writel(flcmcdr_val, FLCMCDR(flctl));
+}
+
+static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+                               uint8_t *buf)
+{
+       int i, eccsize = chip->ecc.size;
+       int eccbytes = chip->ecc.bytes;
+       int eccsteps = chip->ecc.steps;
+       uint8_t *p = buf;
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+
+       for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
+               chip->read_buf(mtd, p, eccsize);
+
+       for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+               if (flctl->hwecc_cant_correct[i])
+                       mtd->ecc_stats.failed++;
+               else
+                       mtd->ecc_stats.corrected += 0;
+       }
+
+       return 0;
+}
+
+static void flctl_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+                                  const uint8_t *buf)
+{
+       int i, eccsize = chip->ecc.size;
+       int eccbytes = chip->ecc.bytes;
+       int eccsteps = chip->ecc.steps;
+       const uint8_t *p = buf;
+
+       for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
+               chip->write_buf(mtd, p, eccsize);
+}
+
+static void execmd_read_page_sector(struct mtd_info *mtd, int page_addr)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       int sector, page_sectors;
+
+       if (flctl->page_size)
+               page_sectors = 4;
+       else
+               page_sectors = 1;
+
+       writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE | _4ECCCORRECT,
+                FLCMNCR(flctl));
+
+       set_cmd_regs(mtd, NAND_CMD_READ0,
+               (NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
+
+       for (sector = 0; sector < page_sectors; sector++) {
+               int ret;
+
+               empty_fifo(flctl);
+               writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
+               writel(page_addr << 2 | sector, FLADR(flctl));
+
+               start_translation(flctl);
+               read_fiforeg(flctl, 512, 512 * sector);
+
+               ret = read_ecfiforeg(flctl,
+                       &flctl->done_buff[mtd->writesize + 16 * sector]);
+
+               if (ret)
+                       flctl->hwecc_cant_correct[sector] = 1;
+
+               writel(0x0, FL4ECCCR(flctl));
+               wait_completion(flctl);
+       }
+       writel(readl(FLCMNCR(flctl)) & ~(ACM_SACCES_MODE | _4ECCCORRECT),
+                       FLCMNCR(flctl));
+}
+
+static void execmd_read_oob(struct mtd_info *mtd, int page_addr)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+
+       set_cmd_regs(mtd, NAND_CMD_READ0,
+               (NAND_CMD_READSTART << 8) | NAND_CMD_READ0);
+
+       empty_fifo(flctl);
+       if (flctl->page_size) {
+               int i;
+               /* In case that the page size is 2k */
+               for (i = 0; i < 16 * 3; i++)
+                       flctl->done_buff[i] = 0xFF;
+
+               set_addr(mtd, 3 * 528 + 512, page_addr);
+               writel(16, FLDTCNTR(flctl));
+
+               start_translation(flctl);
+               read_fiforeg(flctl, 16, 16 * 3);
+               wait_completion(flctl);
+       } else {
+               /* In case that the page size is 512b */
+               set_addr(mtd, 512, page_addr);
+               writel(16, FLDTCNTR(flctl));
+
+               start_translation(flctl);
+               read_fiforeg(flctl, 16, 0);
+               wait_completion(flctl);
+       }
+}
+
+static void execmd_write_page_sector(struct mtd_info *mtd)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       int i, page_addr = flctl->seqin_page_addr;
+       int sector, page_sectors;
+
+       if (flctl->page_size)
+               page_sectors = 4;
+       else
+               page_sectors = 1;
+
+       writel(readl(FLCMNCR(flctl)) | ACM_SACCES_MODE, FLCMNCR(flctl));
+
+       set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
+                       (NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
+
+       for (sector = 0; sector < page_sectors; sector++) {
+               empty_fifo(flctl);
+               writel(readl(FLCMDCR(flctl)) | 1, FLCMDCR(flctl));
+               writel(page_addr << 2 | sector, FLADR(flctl));
+
+               start_translation(flctl);
+               write_fiforeg(flctl, 512, 512 * sector);
+
+               for (i = 0; i < 4; i++) {
+                       wait_wecfifo_ready(flctl); /* wait for write ready */
+                       writel(0xFFFFFFFF, FLECFIFO(flctl));
+               }
+               wait_completion(flctl);
+       }
+
+       writel(readl(FLCMNCR(flctl)) & ~ACM_SACCES_MODE, FLCMNCR(flctl));
+}
+
+static void execmd_write_oob(struct mtd_info *mtd)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       int page_addr = flctl->seqin_page_addr;
+       int sector, page_sectors;
+
+       if (flctl->page_size) {
+               sector = 3;
+               page_sectors = 4;
+       } else {
+               sector = 0;
+               page_sectors = 1;
+       }
+
+       set_cmd_regs(mtd, NAND_CMD_PAGEPROG,
+                       (NAND_CMD_PAGEPROG << 8) | NAND_CMD_SEQIN);
+
+       for (; sector < page_sectors; sector++) {
+               empty_fifo(flctl);
+               set_addr(mtd, sector * 528 + 512, page_addr);
+               writel(16, FLDTCNTR(flctl));    /* set read size */
+
+               start_translation(flctl);
+               write_fiforeg(flctl, 16, 16 * sector);
+               wait_completion(flctl);
+       }
+}
+
+static void flctl_cmdfunc(struct mtd_info *mtd, unsigned int command,
+                       int column, int page_addr)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       uint32_t read_cmd = 0;
+
+       flctl->read_bytes = 0;
+       if (command != NAND_CMD_PAGEPROG)
+               flctl->index = 0;
+
+       switch (command) {
+       case NAND_CMD_READ1:
+       case NAND_CMD_READ0:
+               if (flctl->hwecc) {
+                       /* read page with hwecc */
+                       execmd_read_page_sector(mtd, page_addr);
+                       break;
+               }
+               empty_fifo(flctl);
+               if (flctl->page_size)
+                       set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
+                               | command);
+               else
+                       set_cmd_regs(mtd, command, command);
+
+               set_addr(mtd, 0, page_addr);
+
+               flctl->read_bytes = mtd->writesize + mtd->oobsize;
+               flctl->index += column;
+               goto read_normal_exit;
+
+       case NAND_CMD_READOOB:
+               if (flctl->hwecc) {
+                       /* read page with hwecc */
+                       execmd_read_oob(mtd, page_addr);
+                       break;
+               }
+
+               empty_fifo(flctl);
+               if (flctl->page_size) {
+                       set_cmd_regs(mtd, command, (NAND_CMD_READSTART << 8)
+                               | NAND_CMD_READ0);
+                       set_addr(mtd, mtd->writesize, page_addr);
+               } else {
+                       set_cmd_regs(mtd, command, command);
+                       set_addr(mtd, 0, page_addr);
+               }
+               flctl->read_bytes = mtd->oobsize;
+               goto read_normal_exit;
+
+       case NAND_CMD_READID:
+               empty_fifo(flctl);
+               set_cmd_regs(mtd, command, command);
+               set_addr(mtd, 0, 0);
+
+               flctl->read_bytes = 4;
+               writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
+               start_translation(flctl);
+               read_datareg(flctl, 0); /* read and end */
+               break;
+
+       case NAND_CMD_ERASE1:
+               flctl->erase1_page_addr = page_addr;
+               break;
+
+       case NAND_CMD_ERASE2:
+               set_cmd_regs(mtd, NAND_CMD_ERASE1,
+                       (command << 8) | NAND_CMD_ERASE1);
+               set_addr(mtd, -1, flctl->erase1_page_addr);
+               start_translation(flctl);
+               wait_completion(flctl);
+               break;
+
+       case NAND_CMD_SEQIN:
+               if (!flctl->page_size) {
+                       /* output read command */
+                       if (column >= mtd->writesize) {
+                               column -= mtd->writesize;
+                               read_cmd = NAND_CMD_READOOB;
+                       } else if (column < 256) {
+                               read_cmd = NAND_CMD_READ0;
+                       } else {
+                               column -= 256;
+                               read_cmd = NAND_CMD_READ1;
+                       }
+               }
+               flctl->seqin_column = column;
+               flctl->seqin_page_addr = page_addr;
+               flctl->seqin_read_cmd = read_cmd;
+               break;
+
+       case NAND_CMD_PAGEPROG:
+               empty_fifo(flctl);
+               if (!flctl->page_size) {
+                       set_cmd_regs(mtd, NAND_CMD_SEQIN,
+                                       flctl->seqin_read_cmd);
+                       set_addr(mtd, -1, -1);
+                       writel(0, FLDTCNTR(flctl));     /* set 0 size */
+                       start_translation(flctl);
+                       wait_completion(flctl);
+               }
+               if (flctl->hwecc) {
+                       /* write page with hwecc */
+                       if (flctl->seqin_column == mtd->writesize)
+                               execmd_write_oob(mtd);
+                       else if (!flctl->seqin_column)
+                               execmd_write_page_sector(mtd);
+                       else
+                               printk(KERN_ERR "Invalid address !?\n");
+                       break;
+               }
+               set_cmd_regs(mtd, command, (command << 8) | NAND_CMD_SEQIN);
+               set_addr(mtd, flctl->seqin_column, flctl->seqin_page_addr);
+               writel(flctl->index, FLDTCNTR(flctl));  /* set write size */
+               start_translation(flctl);
+               write_fiforeg(flctl, flctl->index, 0);
+               wait_completion(flctl);
+               break;
+
+       case NAND_CMD_STATUS:
+               set_cmd_regs(mtd, command, command);
+               set_addr(mtd, -1, -1);
+
+               flctl->read_bytes = 1;
+               writel(flctl->read_bytes, FLDTCNTR(flctl)); /* set read size */
+               start_translation(flctl);
+               read_datareg(flctl, 0); /* read and end */
+               break;
+
+       case NAND_CMD_RESET:
+               set_cmd_regs(mtd, command, command);
+               set_addr(mtd, -1, -1);
+
+               writel(0, FLDTCNTR(flctl));     /* set 0 size */
+               start_translation(flctl);
+               wait_completion(flctl);
+               break;
+
+       default:
+               break;
+       }
+       return;
+
+read_normal_exit:
+       writel(flctl->read_bytes, FLDTCNTR(flctl));     /* set read size */
+       start_translation(flctl);
+       read_fiforeg(flctl, flctl->read_bytes, 0);
+       wait_completion(flctl);
+       return;
+}
+
+static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       uint32_t flcmncr_val = readl(FLCMNCR(flctl));
+
+       switch (chipnr) {
+       case -1:
+               flcmncr_val &= ~CE0_ENABLE;
+               writel(flcmncr_val, FLCMNCR(flctl));
+               break;
+       case 0:
+               flcmncr_val |= CE0_ENABLE;
+               writel(flcmncr_val, FLCMNCR(flctl));
+               break;
+       default:
+               BUG();
+       }
+}
+
+static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       int i, index = flctl->index;
+
+       for (i = 0; i < len; i++)
+               flctl->done_buff[index + i] = buf[i];
+       flctl->index += len;
+}
+
+static uint8_t flctl_read_byte(struct mtd_info *mtd)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       int index = flctl->index;
+       uint8_t data;
+
+       data = flctl->done_buff[index];
+       flctl->index++;
+       return data;
+}
+
+static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++)
+               buf[i] = flctl_read_byte(mtd);
+}
+
+static int flctl_verify_buf(struct mtd_info *mtd, const u_char *buf, int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++)
+               if (buf[i] != flctl_read_byte(mtd))
+                       return -EFAULT;
+       return 0;
+}
+
+static void flctl_register_init(struct sh_flctl *flctl, unsigned long val)
+{
+       writel(val, FLCMNCR(flctl));
+}
+
+static int flctl_chip_init_tail(struct mtd_info *mtd)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       struct nand_chip *chip = &flctl->chip;
+
+       if (mtd->writesize == 512) {
+               flctl->page_size = 0;
+               if (chip->chipsize > (32 << 20)) {
+                       /* big than 32MB */
+                       flctl->rw_ADRCNT = ADRCNT_4;
+                       flctl->erase_ADRCNT = ADRCNT_3;
+               } else if (chip->chipsize > (2 << 16)) {
+                       /* big than 128KB */
+                       flctl->rw_ADRCNT = ADRCNT_3;
+                       flctl->erase_ADRCNT = ADRCNT_2;
+               } else {
+                       flctl->rw_ADRCNT = ADRCNT_2;
+                       flctl->erase_ADRCNT = ADRCNT_1;
+               }
+       } else {
+               flctl->page_size = 1;
+               if (chip->chipsize > (128 << 20)) {
+                       /* big than 128MB */
+                       flctl->rw_ADRCNT = ADRCNT2_E;
+                       flctl->erase_ADRCNT = ADRCNT_3;
+               } else if (chip->chipsize > (8 << 16)) {
+                       /* big than 512KB */
+                       flctl->rw_ADRCNT = ADRCNT_4;
+                       flctl->erase_ADRCNT = ADRCNT_2;
+               } else {
+                       flctl->rw_ADRCNT = ADRCNT_3;
+                       flctl->erase_ADRCNT = ADRCNT_1;
+               }
+       }
+
+       if (flctl->hwecc) {
+               if (mtd->writesize == 512) {
+                       chip->ecc.layout = &flctl_4secc_oob_16;
+                       chip->badblock_pattern = &flctl_4secc_smallpage;
+               } else {
+                       chip->ecc.layout = &flctl_4secc_oob_64;
+                       chip->badblock_pattern = &flctl_4secc_largepage;
+               }
+
+               chip->ecc.size = 512;
+               chip->ecc.bytes = 10;
+               chip->ecc.read_page = flctl_read_page_hwecc;
+               chip->ecc.write_page = flctl_write_page_hwecc;
+               chip->ecc.mode = NAND_ECC_HW;
+
+               /* 4 symbols ECC enabled */
+               writel(readl(FLCMNCR(flctl)) | _4ECCEN | ECCPOS2 | ECCPOS_02,
+                               FLCMNCR(flctl));
+       } else {
+               chip->ecc.mode = NAND_ECC_SOFT;
+       }
+
+       return 0;
+}
+
+static int __init flctl_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+       struct sh_flctl *flctl;
+       struct mtd_info *flctl_mtd;
+       struct nand_chip *nand;
+       struct sh_flctl_platform_data *pdata;
+       int ret;
+
+       pdata = pdev->dev.platform_data;
+       if (pdata == NULL) {
+               printk(KERN_ERR "sh_flctl platform_data not found.\n");
+               return -ENODEV;
+       }
+
+       flctl = kzalloc(sizeof(struct sh_flctl), GFP_KERNEL);
+       if (!flctl) {
+               printk(KERN_ERR "Unable to allocate NAND MTD dev structure.\n");
+               return -ENOMEM;
+       }
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               printk(KERN_ERR "%s: resource not found.\n", __func__);
+               ret = -ENODEV;
+               goto err;
+       }
+
+       flctl->reg = ioremap(res->start, res->end - res->start + 1);
+       if (flctl->reg == NULL) {
+               printk(KERN_ERR "%s: ioremap error.\n", __func__);
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       platform_set_drvdata(pdev, flctl);
+       flctl_mtd = &flctl->mtd;
+       nand = &flctl->chip;
+       flctl_mtd->priv = nand;
+       flctl->hwecc = pdata->has_hwecc;
+
+       flctl_register_init(flctl, pdata->flcmncr_val);
+
+       nand->options = NAND_NO_AUTOINCR;
+
+       /* Set address of hardware control function */
+       /* 20 us command delay time */
+       nand->chip_delay = 20;
+
+       nand->read_byte = flctl_read_byte;
+       nand->write_buf = flctl_write_buf;
+       nand->read_buf = flctl_read_buf;
+       nand->verify_buf = flctl_verify_buf;
+       nand->select_chip = flctl_select_chip;
+       nand->cmdfunc = flctl_cmdfunc;
+
+       ret = nand_scan_ident(flctl_mtd, 1);
+       if (ret)
+               goto err;
+
+       ret = flctl_chip_init_tail(flctl_mtd);
+       if (ret)
+               goto err;
+
+       ret = nand_scan_tail(flctl_mtd);
+       if (ret)
+               goto err;
+
+       add_mtd_partitions(flctl_mtd, pdata->parts, pdata->nr_parts);
+
+       return 0;
+
+err:
+       kfree(flctl);
+       return ret;
+}
+
+static int __exit flctl_remove(struct platform_device *pdev)
+{
+       struct sh_flctl *flctl = platform_get_drvdata(pdev);
+
+       nand_release(&flctl->mtd);
+       kfree(flctl);
+
+       return 0;
+}
+
+static struct platform_driver flctl_driver = {
+       .probe          = flctl_probe,
+       .remove         = flctl_remove,
+       .driver = {
+               .name   = "sh_flctl",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init flctl_nand_init(void)
+{
+       return platform_driver_register(&flctl_driver);
+}
+
+static void __exit flctl_nand_cleanup(void)
+{
+       platform_driver_unregister(&flctl_driver);
+}
+
+module_init(flctl_nand_init);
+module_exit(flctl_nand_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yoshihiro Shimoda");
+MODULE_DESCRIPTION("SuperH FLCTL driver");
+MODULE_ALIAS("platform:sh_flctl");
diff --git a/drivers/mtd/nand/toto.c b/drivers/mtd/nand/toto.c
deleted file mode 100644 (file)
index bbf492e..0000000
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- *  drivers/mtd/nand/toto.c
- *
- *  Copyright (c) 2003 Texas Instruments
- *
- *  Derived from drivers/mtd/autcpu12.c
- *
- *  Copyright (c) 2002 Thomas Gleixner <tgxl@linutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Overview:
- *   This is a device driver for the NAND flash device found on the
- *   TI fido board. It supports 32MiB and 64MiB cards
- */
-
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/partitions.h>
-#include <asm/io.h>
-#include <asm/arch/hardware.h>
-#include <asm/sizes.h>
-#include <asm/arch/toto.h>
-#include <asm/arch-omap1510/hardware.h>
-#include <asm/arch/gpio.h>
-
-#define CONFIG_NAND_WORKAROUND 1
-
-/*
- * MTD structure for TOTO board
- */
-static struct mtd_info *toto_mtd = NULL;
-
-static unsigned long toto_io_base = OMAP_FLASH_1_BASE;
-
-/*
- * Define partitions for flash devices
- */
-
-static struct mtd_partition partition_info64M[] = {
-       { .name =       "toto kernel partition 1",
-         .offset =     0,
-         .size =       2 * SZ_1M },
-       { .name =       "toto file sys partition 2",
-         .offset =     2 * SZ_1M,
-         .size =       14 * SZ_1M },
-       { .name =       "toto user partition 3",
-         .offset =     16 * SZ_1M,
-         .size =       16 * SZ_1M },
-       { .name =       "toto devboard extra partition 4",
-         .offset =     32 * SZ_1M,
-         .size =       32 * SZ_1M },
-};
-
-static struct mtd_partition partition_info32M[] = {
-       { .name =       "toto kernel partition 1",
-         .offset =     0,
-         .size =       2 * SZ_1M },
-       { .name =       "toto file sys partition 2",
-         .offset =     2 * SZ_1M,
-         .size =       14 * SZ_1M },
-       { .name =       "toto user partition 3",
-         .offset =     16 * SZ_1M,
-         .size =       16 * SZ_1M },
-};
-
-#define NUM_PARTITIONS32M 3
-#define NUM_PARTITIONS64M 4
-
-/*
- *     hardware specific access to control-lines
- *
- *     ctrl:
- *     NAND_NCE: bit 0 -> bit 14 (0x4000)
- *     NAND_CLE: bit 1 -> bit 12 (0x1000)
- *     NAND_ALE: bit 2 -> bit 1  (0x0002)
- */
-static void toto_hwcontrol(struct mtd_info *mtd, int cmd,
-                          unsigned int ctrl)
-{
-       struct nand_chip *chip = mtd->priv;
-
-       if (ctrl & NAND_CTRL_CHANGE) {
-               unsigned long bits;
-
-               /* hopefully enough time for tc make proceding write to clear */
-               udelay(1);
-
-               bits = (~ctrl & NAND_NCE) << 14;
-               bits |= (ctrl & NAND_CLE) << 12;
-               bits |= (ctrl & NAND_ALE) >> 1;
-
-#warning Wild guess as gpiosetout() is nowhere defined in the kernel source - tglx
-               gpiosetout(0x5002, bits);
-
-#ifdef CONFIG_NAND_WORKAROUND
-               /* "some" dev boards busted, blue wired to rts2 :( */
-               rts2setout(2, (ctrl & NAND_CLE) << 1);
-#endif
-               /* allow time to ensure gpio state to over take memory write */
-               udelay(1);
-       }
-
-       if (cmd != NAND_CMD_NONE)
-               writeb(cmd, chip->IO_ADDR_W);
-}
-
-/*
- * Main initialization routine
- */
-static int __init toto_init(void)
-{
-       struct nand_chip *this;
-       int err = 0;
-
-       /* Allocate memory for MTD device structure and private data */
-       toto_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
-       if (!toto_mtd) {
-               printk(KERN_WARNING "Unable to allocate toto NAND MTD device structure.\n");
-               err = -ENOMEM;
-               goto out;
-       }
-
-       /* Get pointer to private data */
-       this = (struct nand_chip *)(&toto_mtd[1]);
-
-       /* Initialize structures */
-       memset(toto_mtd, 0, sizeof(struct mtd_info));
-       memset(this, 0, sizeof(struct nand_chip));
-
-       /* Link the private data with the MTD structure */
-       toto_mtd->priv = this;
-       toto_mtd->owner = THIS_MODULE;
-
-       /* Set address of NAND IO lines */
-       this->IO_ADDR_R = toto_io_base;
-       this->IO_ADDR_W = toto_io_base;
-       this->cmd_ctrl = toto_hwcontrol;
-       this->dev_ready = NULL;
-       /* 25 us command delay time */
-       this->chip_delay = 30;
-       this->ecc.mode = NAND_ECC_SOFT;
-
-       /* Scan to find existance of the device */
-       if (nand_scan(toto_mtd, 1)) {
-               err = -ENXIO;
-               goto out_mtd;
-       }
-
-       /* Register the partitions */
-       switch (toto_mtd->size) {
-       case SZ_64M:
-               add_mtd_partitions(toto_mtd, partition_info64M, NUM_PARTITIONS64M);
-               break;
-       case SZ_32M:
-               add_mtd_partitions(toto_mtd, partition_info32M, NUM_PARTITIONS32M);
-               break;
-       default:{
-                       printk(KERN_WARNING "Unsupported Nand device\n");
-                       err = -ENXIO;
-                       goto out_buf;
-               }
-       }
-
-       gpioreserve(NAND_MASK); /* claim our gpios */
-       archflashwp(0, 0);      /* open up flash for writing */
-
-       goto out;
-
- out_mtd:
-       kfree(toto_mtd);
- out:
-       return err;
-}
-
-module_init(toto_init);
-
-/*
- * Clean up routine
- */
-static void __exit toto_cleanup(void)
-{
-       /* Release resources, unregister device */
-       nand_release(toto_mtd);
-
-       /* Free the MTD device structure */
-       kfree(toto_mtd);
-
-       /* stop flash writes */
-       archflashwp(0, 1);
-
-       /* release gpios to system */
-       gpiorelease(NAND_MASK);
-}
-
-module_exit(toto_cleanup);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Richard Woodruff <r-woodruff2@ti.com>");
-MODULE_DESCRIPTION("Glue layer for NAND flash on toto board");
index 4f80c2fd89af71a0a0c9e5bae77c013a663a4c7b..9e45b3f39c0e7e6cf4f4417f874376e5bfdda9df 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/mtd/partitions.h>
 
 int __devinit of_mtd_parse_partitions(struct device *dev,
-                                      struct mtd_info *mtd,
                                       struct device_node *node,
                                       struct mtd_partition **pparts)
 {
index cb41cbca64f7e63394f352820eb90cc610bac6f8..79fa79e8f8de9b67a5949888d029f93b7dbebb27 100644 (file)
@@ -27,8 +27,16 @@ config MTD_ONENAND_GENERIC
        help
          Support for OneNAND flash via platform device driver.
 
+config MTD_ONENAND_OMAP2
+       tristate "OneNAND on OMAP2/OMAP3 support"
+       depends on MTD_ONENAND && (ARCH_OMAP2 || ARCH_OMAP3)
+       help
+         Support for a OneNAND flash device connected to an OMAP2/OMAP3 CPU
+         via the GPMC memory controller.
+
 config MTD_ONENAND_OTP
        bool "OneNAND OTP Support"
+       select HAVE_MTD_OTP
        help
          One Block of the NAND Flash Array memory is reserved as
          a One-Time Programmable Block memory area.
index 4d2eacfd7e11a761c3cec319f34c735d0fc0222b..64b6cc61a5209241145a4a3e1856dcb39a96d19c 100644 (file)
@@ -7,6 +7,7 @@ obj-$(CONFIG_MTD_ONENAND)               += onenand.o
 
 # Board specific.
 obj-$(CONFIG_MTD_ONENAND_GENERIC)      += generic.o
+obj-$(CONFIG_MTD_ONENAND_OMAP2)                += omap2.o
 
 # Simulator
 obj-$(CONFIG_MTD_ONENAND_SIM)          += onenand_sim.o
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
new file mode 100644 (file)
index 0000000..8387e05
--- /dev/null
@@ -0,0 +1,802 @@
+/*
+ *  linux/drivers/mtd/onenand/omap2.c
+ *
+ *  OneNAND driver for OMAP2 / OMAP3
+ *
+ *  Copyright Â© 2005-2006 Nokia Corporation
+ *
+ *  Author: Jarkko Lavinen <jarkko.lavinen@nokia.com> and Juha Yrjölä
+ *  IRQ and DMA support written by Timo Teras
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; see the file COPYING. If not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/onenand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+#include <asm/mach/flash.h>
+#include <asm/arch/gpmc.h>
+#include <asm/arch/onenand.h>
+#include <asm/arch/gpio.h>
+#include <asm/arch/gpmc.h>
+#include <asm/arch/pm.h>
+
+#include <linux/dma-mapping.h>
+#include <asm/dma-mapping.h>
+#include <asm/arch/dma.h>
+
+#include <asm/arch/board.h>
+
+#define DRIVER_NAME "omap2-onenand"
+
+#define ONENAND_IO_SIZE                SZ_128K
+#define ONENAND_BUFRAM_SIZE    (1024 * 5)
+
+struct omap2_onenand {
+       struct platform_device *pdev;
+       int gpmc_cs;
+       unsigned long phys_base;
+       int gpio_irq;
+       struct mtd_info mtd;
+       struct mtd_partition *parts;
+       struct onenand_chip onenand;
+       struct completion irq_done;
+       struct completion dma_done;
+       int dma_channel;
+       int freq;
+       int (*setup)(void __iomem *base, int freq);
+};
+
+static void omap2_onenand_dma_cb(int lch, u16 ch_status, void *data)
+{
+       struct omap2_onenand *c = data;
+
+       complete(&c->dma_done);
+}
+
+static irqreturn_t omap2_onenand_interrupt(int irq, void *dev_id)
+{
+       struct omap2_onenand *c = dev_id;
+
+       complete(&c->irq_done);
+
+       return IRQ_HANDLED;
+}
+
+static inline unsigned short read_reg(struct omap2_onenand *c, int reg)
+{
+       return readw(c->onenand.base + reg);
+}
+
+static inline void write_reg(struct omap2_onenand *c, unsigned short value,
+                            int reg)
+{
+       writew(value, c->onenand.base + reg);
+}
+
+static void wait_err(char *msg, int state, unsigned int ctrl, unsigned int intr)
+{
+       printk(KERN_ERR "onenand_wait: %s! state %d ctrl 0x%04x intr 0x%04x\n",
+              msg, state, ctrl, intr);
+}
+
+static void wait_warn(char *msg, int state, unsigned int ctrl,
+                     unsigned int intr)
+{
+       printk(KERN_WARNING "onenand_wait: %s! state %d ctrl 0x%04x "
+              "intr 0x%04x\n", msg, state, ctrl, intr);
+}
+
+static int omap2_onenand_wait(struct mtd_info *mtd, int state)
+{
+       struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+       unsigned int intr = 0;
+       unsigned int ctrl;
+       unsigned long timeout;
+       u32 syscfg;
+
+       if (state == FL_RESETING) {
+               int i;
+
+               for (i = 0; i < 20; i++) {
+                       udelay(1);
+                       intr = read_reg(c, ONENAND_REG_INTERRUPT);
+                       if (intr & ONENAND_INT_MASTER)
+                               break;
+               }
+               ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+               if (ctrl & ONENAND_CTRL_ERROR) {
+                       wait_err("controller error", state, ctrl, intr);
+                       return -EIO;
+               }
+               if (!(intr & ONENAND_INT_RESET)) {
+                       wait_err("timeout", state, ctrl, intr);
+                       return -EIO;
+               }
+               return 0;
+       }
+
+       if (state != FL_READING) {
+               int result;
+
+               /* Turn interrupts on */
+               syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+               if (!(syscfg & ONENAND_SYS_CFG1_IOBE)) {
+                       syscfg |= ONENAND_SYS_CFG1_IOBE;
+                       write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+                       if (cpu_is_omap34xx())
+                               /* Add a delay to let GPIO settle */
+                               syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+               }
+
+               INIT_COMPLETION(c->irq_done);
+               if (c->gpio_irq) {
+                       result = omap_get_gpio_datain(c->gpio_irq);
+                       if (result == -1) {
+                               ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+                               intr = read_reg(c, ONENAND_REG_INTERRUPT);
+                               wait_err("gpio error", state, ctrl, intr);
+                               return -EIO;
+                       }
+               } else
+                       result = 0;
+               if (result == 0) {
+                       int retry_cnt = 0;
+retry:
+                       result = wait_for_completion_timeout(&c->irq_done,
+                                                   msecs_to_jiffies(20));
+                       if (result == 0) {
+                               /* Timeout after 20ms */
+                               ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+                               if (ctrl & ONENAND_CTRL_ONGO) {
+                                       /*
+                                        * The operation seems to be still going
+                                        * so give it some more time.
+                                        */
+                                       retry_cnt += 1;
+                                       if (retry_cnt < 3)
+                                               goto retry;
+                                       intr = read_reg(c,
+                                                       ONENAND_REG_INTERRUPT);
+                                       wait_err("timeout", state, ctrl, intr);
+                                       return -EIO;
+                               }
+                               intr = read_reg(c, ONENAND_REG_INTERRUPT);
+                               if ((intr & ONENAND_INT_MASTER) == 0)
+                                       wait_warn("timeout", state, ctrl, intr);
+                       }
+               }
+       } else {
+               int retry_cnt = 0;
+
+               /* Turn interrupts off */
+               syscfg = read_reg(c, ONENAND_REG_SYS_CFG1);
+               syscfg &= ~ONENAND_SYS_CFG1_IOBE;
+               write_reg(c, syscfg, ONENAND_REG_SYS_CFG1);
+
+               timeout = jiffies + msecs_to_jiffies(20);
+               while (1) {
+                       if (time_before(jiffies, timeout)) {
+                               intr = read_reg(c, ONENAND_REG_INTERRUPT);
+                               if (intr & ONENAND_INT_MASTER)
+                                       break;
+                       } else {
+                               /* Timeout after 20ms */
+                               ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+                               if (ctrl & ONENAND_CTRL_ONGO) {
+                                       /*
+                                        * The operation seems to be still going
+                                        * so give it some more time.
+                                        */
+                                       retry_cnt += 1;
+                                       if (retry_cnt < 3) {
+                                               timeout = jiffies +
+                                                         msecs_to_jiffies(20);
+                                               continue;
+                                       }
+                               }
+                               break;
+                       }
+               }
+       }
+
+       intr = read_reg(c, ONENAND_REG_INTERRUPT);
+       ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
+
+       if (intr & ONENAND_INT_READ) {
+               int ecc = read_reg(c, ONENAND_REG_ECC_STATUS);
+
+               if (ecc) {
+                       unsigned int addr1, addr8;
+
+                       addr1 = read_reg(c, ONENAND_REG_START_ADDRESS1);
+                       addr8 = read_reg(c, ONENAND_REG_START_ADDRESS8);
+                       if (ecc & ONENAND_ECC_2BIT_ALL) {
+                               printk(KERN_ERR "onenand_wait: ECC error = "
+                                      "0x%04x, addr1 %#x, addr8 %#x\n",
+                                      ecc, addr1, addr8);
+                               mtd->ecc_stats.failed++;
+                               return -EBADMSG;
+                       } else if (ecc & ONENAND_ECC_1BIT_ALL) {
+                               printk(KERN_NOTICE "onenand_wait: correctable "
+                                      "ECC error = 0x%04x, addr1 %#x, "
+                                      "addr8 %#x\n", ecc, addr1, addr8);
+                               mtd->ecc_stats.corrected++;
+                       }
+               }
+       } else if (state == FL_READING) {
+               wait_err("timeout", state, ctrl, intr);
+               return -EIO;
+       }
+
+       if (ctrl & ONENAND_CTRL_ERROR) {
+               wait_err("controller error", state, ctrl, intr);
+               if (ctrl & ONENAND_CTRL_LOCK)
+                       printk(KERN_ERR "onenand_wait: "
+                                       "Device is write protected!!!\n");
+               return -EIO;
+       }
+
+       if (ctrl & 0xFE9F)
+               wait_warn("unexpected controller status", state, ctrl, intr);
+
+       return 0;
+}
+
+static inline int omap2_onenand_bufferram_offset(struct mtd_info *mtd, int area)
+{
+       struct onenand_chip *this = mtd->priv;
+
+       if (ONENAND_CURRENT_BUFFERRAM(this)) {
+               if (area == ONENAND_DATARAM)
+                       return mtd->writesize;
+               if (area == ONENAND_SPARERAM)
+                       return mtd->oobsize;
+       }
+
+       return 0;
+}
+
+#if defined(CONFIG_ARCH_OMAP3) || defined(MULTI_OMAP2)
+
+static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
+                                       unsigned char *buffer, int offset,
+                                       size_t count)
+{
+       struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+       struct onenand_chip *this = mtd->priv;
+       dma_addr_t dma_src, dma_dst;
+       int bram_offset;
+       unsigned long timeout;
+       void *buf = (void *)buffer;
+       size_t xtra;
+       volatile unsigned *done;
+
+       bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+       if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
+               goto out_copy;
+
+       if (buf >= high_memory) {
+               struct page *p1;
+
+               if (((size_t)buf & PAGE_MASK) !=
+                   ((size_t)(buf + count - 1) & PAGE_MASK))
+                       goto out_copy;
+               p1 = vmalloc_to_page(buf);
+               if (!p1)
+                       goto out_copy;
+               buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
+       }
+
+       xtra = count & 3;
+       if (xtra) {
+               count -= xtra;
+               memcpy(buf + count, this->base + bram_offset + count, xtra);
+       }
+
+       dma_src = c->phys_base + bram_offset;
+       dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE);
+       if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+               dev_err(&c->pdev->dev,
+                       "Couldn't DMA map a %d byte buffer\n",
+                       count);
+               goto out_copy;
+       }
+
+       omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+                                    count >> 2, 1, 0, 0, 0);
+       omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+                               dma_src, 0, 0);
+       omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+                                dma_dst, 0, 0);
+
+       INIT_COMPLETION(c->dma_done);
+       omap_start_dma(c->dma_channel);
+
+       timeout = jiffies + msecs_to_jiffies(20);
+       done = &c->dma_done.done;
+       while (time_before(jiffies, timeout))
+               if (*done)
+                       break;
+
+       dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
+
+       if (!*done) {
+               dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+               goto out_copy;
+       }
+
+       return 0;
+
+out_copy:
+       memcpy(buf, this->base + bram_offset, count);
+       return 0;
+}
+
+static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
+                                        const unsigned char *buffer,
+                                        int offset, size_t count)
+{
+       struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+       struct onenand_chip *this = mtd->priv;
+       dma_addr_t dma_src, dma_dst;
+       int bram_offset;
+       unsigned long timeout;
+       void *buf = (void *)buffer;
+       volatile unsigned *done;
+
+       bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+       if (bram_offset & 3 || (size_t)buf & 3 || count < 384)
+               goto out_copy;
+
+       /* panic_write() may be in an interrupt context */
+       if (in_interrupt())
+               goto out_copy;
+
+       if (buf >= high_memory) {
+               struct page *p1;
+
+               if (((size_t)buf & PAGE_MASK) !=
+                   ((size_t)(buf + count - 1) & PAGE_MASK))
+                       goto out_copy;
+               p1 = vmalloc_to_page(buf);
+               if (!p1)
+                       goto out_copy;
+               buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK);
+       }
+
+       dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE);
+       dma_dst = c->phys_base + bram_offset;
+       if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+               dev_err(&c->pdev->dev,
+                       "Couldn't DMA map a %d byte buffer\n",
+                       count);
+               return -1;
+       }
+
+       omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+                                    count >> 2, 1, 0, 0, 0);
+       omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+                               dma_src, 0, 0);
+       omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+                                dma_dst, 0, 0);
+
+       INIT_COMPLETION(c->dma_done);
+       omap_start_dma(c->dma_channel);
+
+       timeout = jiffies + msecs_to_jiffies(20);
+       done = &c->dma_done.done;
+       while (time_before(jiffies, timeout))
+               if (*done)
+                       break;
+
+       dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
+
+       if (!*done) {
+               dev_err(&c->pdev->dev, "timeout waiting for DMA\n");
+               goto out_copy;
+       }
+
+       return 0;
+
+out_copy:
+       memcpy(this->base + bram_offset, buf, count);
+       return 0;
+}
+
+#else
+
+int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area,
+                                unsigned char *buffer, int offset,
+                                size_t count);
+
+int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area,
+                                 const unsigned char *buffer,
+                                 int offset, size_t count);
+
+#endif
+
+#if defined(CONFIG_ARCH_OMAP2) || defined(MULTI_OMAP2)
+
+static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
+                                       unsigned char *buffer, int offset,
+                                       size_t count)
+{
+       struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+       struct onenand_chip *this = mtd->priv;
+       dma_addr_t dma_src, dma_dst;
+       int bram_offset;
+
+       bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+       /* DMA is not used.  Revisit PM requirements before enabling it. */
+       if (1 || (c->dma_channel < 0) ||
+           ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
+           (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
+               memcpy(buffer, (__force void *)(this->base + bram_offset),
+                      count);
+               return 0;
+       }
+
+       dma_src = c->phys_base + bram_offset;
+       dma_dst = dma_map_single(&c->pdev->dev, buffer, count,
+                                DMA_FROM_DEVICE);
+       if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+               dev_err(&c->pdev->dev,
+                       "Couldn't DMA map a %d byte buffer\n",
+                       count);
+               return -1;
+       }
+
+       omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32,
+                                    count / 4, 1, 0, 0, 0);
+       omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+                               dma_src, 0, 0);
+       omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+                                dma_dst, 0, 0);
+
+       INIT_COMPLETION(c->dma_done);
+       omap_start_dma(c->dma_channel);
+       wait_for_completion(&c->dma_done);
+
+       dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE);
+
+       return 0;
+}
+
+static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
+                                        const unsigned char *buffer,
+                                        int offset, size_t count)
+{
+       struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+       struct onenand_chip *this = mtd->priv;
+       dma_addr_t dma_src, dma_dst;
+       int bram_offset;
+
+       bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset;
+       /* DMA is not used.  Revisit PM requirements before enabling it. */
+       if (1 || (c->dma_channel < 0) ||
+           ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) ||
+           (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) {
+               memcpy((__force void *)(this->base + bram_offset), buffer,
+                      count);
+               return 0;
+       }
+
+       dma_src = dma_map_single(&c->pdev->dev, (void *) buffer, count,
+                                DMA_TO_DEVICE);
+       dma_dst = c->phys_base + bram_offset;
+       if (dma_mapping_error(&c->pdev->dev, dma_dst)) {
+               dev_err(&c->pdev->dev,
+                       "Couldn't DMA map a %d byte buffer\n",
+                       count);
+               return -1;
+       }
+
+       omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S16,
+                                    count / 2, 1, 0, 0, 0);
+       omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+                               dma_src, 0, 0);
+       omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC,
+                                dma_dst, 0, 0);
+
+       INIT_COMPLETION(c->dma_done);
+       omap_start_dma(c->dma_channel);
+       wait_for_completion(&c->dma_done);
+
+       dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE);
+
+       return 0;
+}
+
+#else
+
+int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area,
+                                unsigned char *buffer, int offset,
+                                size_t count);
+
+int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area,
+                                 const unsigned char *buffer,
+                                 int offset, size_t count);
+
+#endif
+
+static struct platform_driver omap2_onenand_driver;
+
+static int __adjust_timing(struct device *dev, void *data)
+{
+       int ret = 0;
+       struct omap2_onenand *c;
+
+       c = dev_get_drvdata(dev);
+
+       BUG_ON(c->setup == NULL);
+
+       /* DMA is not in use so this is all that is needed */
+       /* Revisit for OMAP3! */
+       ret = c->setup(c->onenand.base, c->freq);
+
+       return ret;
+}
+
+int omap2_onenand_rephase(void)
+{
+       return driver_for_each_device(&omap2_onenand_driver.driver, NULL,
+                                     NULL, __adjust_timing);
+}
+
+static void __devexit omap2_onenand_shutdown(struct platform_device *pdev)
+{
+       struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
+
+       /* With certain content in the buffer RAM, the OMAP boot ROM code
+        * can recognize the flash chip incorrectly. Zero it out before
+        * soft reset.
+        */
+       memset((__force void *)c->onenand.base, 0, ONENAND_BUFRAM_SIZE);
+}
+
+static int __devinit omap2_onenand_probe(struct platform_device *pdev)
+{
+       struct omap_onenand_platform_data *pdata;
+       struct omap2_onenand *c;
+       int r;
+
+       pdata = pdev->dev.platform_data;
+       if (pdata == NULL) {
+               dev_err(&pdev->dev, "platform data missing\n");
+               return -ENODEV;
+       }
+
+       c = kzalloc(sizeof(struct omap2_onenand), GFP_KERNEL);
+       if (!c)
+               return -ENOMEM;
+
+       init_completion(&c->irq_done);
+       init_completion(&c->dma_done);
+       c->gpmc_cs = pdata->cs;
+       c->gpio_irq = pdata->gpio_irq;
+       c->dma_channel = pdata->dma_channel;
+       if (c->dma_channel < 0) {
+               /* if -1, don't use DMA */
+               c->gpio_irq = 0;
+       }
+
+       r = gpmc_cs_request(c->gpmc_cs, ONENAND_IO_SIZE, &c->phys_base);
+       if (r < 0) {
+               dev_err(&pdev->dev, "Cannot request GPMC CS\n");
+               goto err_kfree;
+       }
+
+       if (request_mem_region(c->phys_base, ONENAND_IO_SIZE,
+                              pdev->dev.driver->name) == NULL) {
+               dev_err(&pdev->dev, "Cannot reserve memory region at 0x%08lx, "
+                       "size: 0x%x\n", c->phys_base, ONENAND_IO_SIZE);
+               r = -EBUSY;
+               goto err_free_cs;
+       }
+       c->onenand.base = ioremap(c->phys_base, ONENAND_IO_SIZE);
+       if (c->onenand.base == NULL) {
+               r = -ENOMEM;
+               goto err_release_mem_region;
+       }
+
+       if (pdata->onenand_setup != NULL) {
+               r = pdata->onenand_setup(c->onenand.base, c->freq);
+               if (r < 0) {
+                       dev_err(&pdev->dev, "Onenand platform setup failed: "
+                               "%d\n", r);
+                       goto err_iounmap;
+               }
+               c->setup = pdata->onenand_setup;
+       }
+
+       if (c->gpio_irq) {
+               if ((r = omap_request_gpio(c->gpio_irq)) < 0) {
+                       dev_err(&pdev->dev,  "Failed to request GPIO%d for "
+                               "OneNAND\n", c->gpio_irq);
+                       goto err_iounmap;
+       }
+       omap_set_gpio_direction(c->gpio_irq, 1);
+
+       if ((r = request_irq(OMAP_GPIO_IRQ(c->gpio_irq),
+                            omap2_onenand_interrupt, IRQF_TRIGGER_RISING,
+                            pdev->dev.driver->name, c)) < 0)
+               goto err_release_gpio;
+       }
+
+       if (c->dma_channel >= 0) {
+               r = omap_request_dma(0, pdev->dev.driver->name,
+                                    omap2_onenand_dma_cb, (void *) c,
+                                    &c->dma_channel);
+               if (r == 0) {
+                       omap_set_dma_write_mode(c->dma_channel,
+                                               OMAP_DMA_WRITE_NON_POSTED);
+                       omap_set_dma_src_data_pack(c->dma_channel, 1);
+                       omap_set_dma_src_burst_mode(c->dma_channel,
+                                                   OMAP_DMA_DATA_BURST_8);
+                       omap_set_dma_dest_data_pack(c->dma_channel, 1);
+                       omap_set_dma_dest_burst_mode(c->dma_channel,
+                                                    OMAP_DMA_DATA_BURST_8);
+               } else {
+                       dev_info(&pdev->dev,
+                                "failed to allocate DMA for OneNAND, "
+                                "using PIO instead\n");
+                       c->dma_channel = -1;
+               }
+       }
+
+       dev_info(&pdev->dev, "initializing on CS%d, phys base 0x%08lx, virtual "
+                "base %p\n", c->gpmc_cs, c->phys_base,
+                c->onenand.base);
+
+       c->pdev = pdev;
+       c->mtd.name = pdev->dev.bus_id;
+       c->mtd.priv = &c->onenand;
+       c->mtd.owner = THIS_MODULE;
+
+       if (c->dma_channel >= 0) {
+               struct onenand_chip *this = &c->onenand;
+
+               this->wait = omap2_onenand_wait;
+               if (cpu_is_omap34xx()) {
+                       this->read_bufferram = omap3_onenand_read_bufferram;
+                       this->write_bufferram = omap3_onenand_write_bufferram;
+               } else {
+                       this->read_bufferram = omap2_onenand_read_bufferram;
+                       this->write_bufferram = omap2_onenand_write_bufferram;
+               }
+       }
+
+       if ((r = onenand_scan(&c->mtd, 1)) < 0)
+               goto err_release_dma;
+
+       switch ((c->onenand.version_id >> 4) & 0xf) {
+       case 0:
+               c->freq = 40;
+               break;
+       case 1:
+               c->freq = 54;
+               break;
+       case 2:
+               c->freq = 66;
+               break;
+       case 3:
+               c->freq = 83;
+               break;
+       }
+
+#ifdef CONFIG_MTD_PARTITIONS
+       if (pdata->parts != NULL)
+               r = add_mtd_partitions(&c->mtd, pdata->parts,
+                                      pdata->nr_parts);
+       else
+#endif
+               r = add_mtd_device(&c->mtd);
+       if (r < 0)
+               goto err_release_onenand;
+
+       platform_set_drvdata(pdev, c);
+
+       return 0;
+
+err_release_onenand:
+       onenand_release(&c->mtd);
+err_release_dma:
+       if (c->dma_channel != -1)
+               omap_free_dma(c->dma_channel);
+       if (c->gpio_irq)
+               free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
+err_release_gpio:
+       if (c->gpio_irq)
+               omap_free_gpio(c->gpio_irq);
+err_iounmap:
+       iounmap(c->onenand.base);
+err_release_mem_region:
+       release_mem_region(c->phys_base, ONENAND_IO_SIZE);
+err_free_cs:
+       gpmc_cs_free(c->gpmc_cs);
+err_kfree:
+       kfree(c);
+
+       return r;
+}
+
+static int __devexit omap2_onenand_remove(struct platform_device *pdev)
+{
+       struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
+
+       BUG_ON(c == NULL);
+
+#ifdef CONFIG_MTD_PARTITIONS
+       if (c->parts)
+               del_mtd_partitions(&c->mtd);
+       else
+               del_mtd_device(&c->mtd);
+#else
+       del_mtd_device(&c->mtd);
+#endif
+
+       onenand_release(&c->mtd);
+       if (c->dma_channel != -1)
+               omap_free_dma(c->dma_channel);
+       omap2_onenand_shutdown(pdev);
+       platform_set_drvdata(pdev, NULL);
+       if (c->gpio_irq) {
+               free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c);
+               omap_free_gpio(c->gpio_irq);
+       }
+       iounmap(c->onenand.base);
+       release_mem_region(c->phys_base, ONENAND_IO_SIZE);
+       kfree(c);
+
+       return 0;
+}
+
+static struct platform_driver omap2_onenand_driver = {
+       .probe          = omap2_onenand_probe,
+       .remove         = omap2_onenand_remove,
+       .shutdown       = omap2_onenand_shutdown,
+       .driver         = {
+               .name   = DRIVER_NAME,
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init omap2_onenand_init(void)
+{
+       printk(KERN_INFO "OneNAND driver initializing\n");
+       return platform_driver_register(&omap2_onenand_driver);
+}
+
+static void __exit omap2_onenand_exit(void)
+{
+       platform_driver_unregister(&omap2_onenand_driver);
+}
+
+module_init(omap2_onenand_init);
+module_exit(omap2_onenand_exit);
+
+MODULE_ALIAS(DRIVER_NAME);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
+MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
index 926cf3a4135d1ca84094c40d018803709d059334..90ed319f26e6843c98d26dafba79762b2fd4abbc 100644 (file)
@@ -1794,7 +1794,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
                return -EINVAL;
        }
 
-       instr->fail_addr = 0xffffffff;
+       instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
        /* Grab the lock and see if the device is available */
        onenand_get_device(mtd, FL_ERASING);
index a5f3d60047d47f50da60ce7b8569ef326542173d..33a5d6ed6f18aa66181feb4ac84774343e68d5d3 100644 (file)
@@ -321,8 +321,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
        DEBUG(MTD_DEBUG_LEVEL1,
                "SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n",
                ssfdc->cis_block, ssfdc->erase_size, ssfdc->map_len,
-               (ssfdc->map_len + MAX_PHYS_BLK_PER_ZONE - 1) /
-               MAX_PHYS_BLK_PER_ZONE);
+               DIV_ROUND_UP(ssfdc->map_len, MAX_PHYS_BLK_PER_ZONE));
 
        /* Set geometry */
        ssfdc->heads = 16;
index 03c759b4eeb5e852b192794f242020bc45792c22..b30a0b83d7f193ab5cf29ad20cbd7c7e44669f22 100644 (file)
@@ -104,12 +104,9 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
        struct ubi_volume_desc *desc;
        int vol_id = iminor(inode) - 1, mode, ubi_num;
 
-       lock_kernel();
        ubi_num = ubi_major2num(imajor(inode));
-       if (ubi_num < 0) {
-               unlock_kernel();
+       if (ubi_num < 0)
                return ubi_num;
-       }
 
        if (file->f_mode & FMODE_WRITE)
                mode = UBI_READWRITE;
@@ -119,7 +116,6 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
        dbg_gen("open volume %d, mode %d", vol_id, mode);
 
        desc = ubi_open_volume(ubi_num, vol_id, mode);
-       unlock_kernel();
        if (IS_ERR(desc))
                return PTR_ERR(desc);
 
index 967bb4406df9e01c48aa5e5e0ff71a514558412d..4f2daa5bbecfbafe646da17a586cacd3328b5d07 100644 (file)
@@ -387,7 +387,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
                pnum, vol_id, lnum, ec, sqnum, bitflips);
 
        sv = add_volume(si, vol_id, pnum, vid_hdr);
-       if (IS_ERR(sv) < 0)
+       if (IS_ERR(sv))
                return PTR_ERR(sv);
 
        if (si->max_sqnum < sqnum)
index 217d0e111b2a4652212f563cb9baf5255f2bbdca..333c8941552fbe3067be1a9a40fd3c0f704386ad 100644 (file)
@@ -244,8 +244,8 @@ static int vtbl_check(const struct ubi_device *ubi,
                }
 
                if (reserved_pebs > ubi->good_peb_count) {
-                       dbg_err("too large reserved_pebs, good PEBs %d",
-                               ubi->good_peb_count);
+                       dbg_err("too large reserved_pebs %d, good PEBs %d",
+                               reserved_pebs, ubi->good_peb_count);
                        err = 9;
                        goto bad;
                }
index fc5f2dbf532383b637258a3aec52024654d6693e..8b51e10b7783d589b1d7893bb6febd052b3ee23b 100644 (file)
@@ -563,7 +563,7 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
 
        spin_unlock_irqrestore(&iommu->register_lock, flag);
 
-       /* flush context entry will implictly flush write buffer */
+       /* flush context entry will implicitly flush write buffer */
        return 0;
 }
 
@@ -656,7 +656,7 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
        if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
                pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
                        DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
-       /* flush context entry will implictly flush write buffer */
+       /* flush iotlb entry will implicitly flush write buffer */
        return 0;
 }
 
index c9884bba22decf5a34926caa7d05a81b06a8bd90..dbe9f39f44363b3eb50d9cc7bcac0a50bd701315 100644 (file)
@@ -1358,11 +1358,10 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
        return 0;
 
 err_out:
-       dev_warn(&pdev->dev, "BAR %d: can't reserve %s region [%#llx-%#llx]\n",
+       dev_warn(&pdev->dev, "BAR %d: can't reserve %s region %pR\n",
                 bar,
                 pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
-                (unsigned long long)pci_resource_start(pdev, bar),
-                (unsigned long long)pci_resource_end(pdev, bar));
+                &pdev->resource[bar]);
        return -EBUSY;
 }
 
index dd9161a054e1c1264891a918a634d5a60fa0d062..d3db8b24972995750eef5043b2ca700e779dedeb 100644 (file)
@@ -304,9 +304,8 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
                } else {
                        res->start = l64;
                        res->end = l64 + sz64;
-                       printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: [%llx, %llx]\n",
-                               pci_name(dev), pos, (unsigned long long)res->start,
-                               (unsigned long long)res->end);
+                       printk(KERN_DEBUG "PCI: %s reg %x 64bit mmio: %pR\n",
+                               pci_name(dev), pos, res);
                }
        } else {
                sz = pci_size(l, sz, mask);
@@ -316,9 +315,10 @@ static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 
                res->start = l;
                res->end = l + sz;
-               printk(KERN_DEBUG "PCI: %s reg %x %s: [%llx, %llx]\n", pci_name(dev),
-                       pos, (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
-                       (unsigned long long)res->start, (unsigned long long)res->end);
+               printk(KERN_DEBUG "PCI: %s reg %x %s: %pR\n",
+                      pci_name(dev), pos,
+                      (res->flags & IORESOURCE_IO) ? "io port":"32bit mmio",
+                      res);
        }
 
  out:
@@ -389,9 +389,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
                        res->start = base;
                if (!res->end)
                        res->end = limit + 0xfff;
-               printk(KERN_DEBUG "PCI: bridge %s io port: [%llx, %llx]\n",
-                       pci_name(dev), (unsigned long long) res->start,
-                       (unsigned long long) res->end);
+               printk(KERN_DEBUG "PCI: bridge %s io port: %pR\n",
+                      pci_name(dev), res);
        }
 
        res = child->resource[1];
@@ -403,9 +402,8 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
                res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
                res->start = base;
                res->end = limit + 0xfffff;
-               printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: [%llx, %llx]\n",
-                       pci_name(dev), (unsigned long long) res->start,
-                       (unsigned long long) res->end);
+               printk(KERN_DEBUG "PCI: bridge %s 32bit mmio: %pR\n",
+                      pci_name(dev), res);
        }
 
        res = child->resource[2];
@@ -441,9 +439,9 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
                res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
                res->start = base;
                res->end = limit + 0xfffff;
-               printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: [%llx, %llx]\n",
-                       pci_name(dev), (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64" : "32",
-                       (unsigned long long) res->start, (unsigned long long) res->end);
+               printk(KERN_DEBUG "PCI: bridge %s %sbit mmio pref: %pR\n",
+                      pci_name(dev),
+                      (res->flags & PCI_PREF_RANGE_TYPE_64) ? "64":"32", res);
        }
 }
 
index bd5c0e031398e9b50404380076b69c0c25889c71..1f5f6143f35cdcb8fe7777d72409eb3ed8ba6395 100644 (file)
@@ -21,7 +21,7 @@
  * between the ROM and other resources, so enabling it may disable access
  * to MMIO registers or other card memory.
  */
-static int pci_enable_rom(struct pci_dev *pdev)
+int pci_enable_rom(struct pci_dev *pdev)
 {
        struct resource *res = pdev->resource + PCI_ROM_RESOURCE;
        struct pci_bus_region region;
@@ -45,7 +45,7 @@ static int pci_enable_rom(struct pci_dev *pdev)
  * Disable ROM decoding on a PCI device by turning off the last bit in the
  * ROM BAR.
  */
-static void pci_disable_rom(struct pci_dev *pdev)
+void pci_disable_rom(struct pci_dev *pdev)
 {
        u32 rom_addr;
        pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr);
@@ -260,3 +260,5 @@ void pci_cleanup_rom(struct pci_dev *pdev)
 
 EXPORT_SYMBOL(pci_map_rom);
 EXPORT_SYMBOL(pci_unmap_rom);
+EXPORT_SYMBOL_GPL(pci_enable_rom);
+EXPORT_SYMBOL_GPL(pci_disable_rom);
index d5e2106760f81743267dd85169553c4158c1373b..471a429d7a20fec7b116537ed5e90c912da8b7b7 100644 (file)
@@ -356,10 +356,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
                        order = __ffs(align) - 20;
                        if (order > 11) {
                                dev_warn(&dev->dev, "BAR %d bad alignment %llx: "
-                                      "%#016llx-%#016llx\n", i,
-                                      (unsigned long long)align,
-                                      (unsigned long long)r->start,
-                                      (unsigned long long)r->end);
+                                        "%pR\n", i, (unsigned long long)align, r);
                                r->flags = 0;
                                continue;
                        }
@@ -539,11 +536,9 @@ static void pci_bus_dump_res(struct pci_bus *bus)
                 if (!res)
                         continue;
 
-               printk(KERN_INFO "bus: %02x index %x %s: [%llx, %llx]\n",
-                       bus->number, i,
-                       (res->flags & IORESOURCE_IO) ? "io port" : "mmio",
-                       (unsigned long long) res->start,
-                       (unsigned long long) res->end);
+               printk(KERN_INFO "bus: %02x index %x %s: %pR\n",
+                      bus->number, i,
+                      (res->flags & IORESOURCE_IO) ? "io port" : "mmio", res);
         }
 }
 
index 1a5fc83c71b36fec69fe45a30006a27db9bc1300..d4b5c690eaa776112c46cc8f9ca993a49e233d50 100644 (file)
@@ -49,10 +49,8 @@ void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
 
        pcibios_resource_to_bus(dev, &region, res);
 
-       dev_dbg(&dev->dev, "BAR %d: got res [%#llx-%#llx] bus [%#llx-%#llx] "
-               "flags %#lx\n", resno,
-                (unsigned long long)res->start,
-                (unsigned long long)res->end,
+       dev_dbg(&dev->dev, "BAR %d: got res %pR bus [%#llx-%#llx] "
+               "flags %#lx\n", resno, res,
                 (unsigned long long)region.start,
                 (unsigned long long)region.end,
                 (unsigned long)res->flags);
@@ -114,13 +112,11 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
                err = insert_resource(root, res);
 
        if (err) {
-               dev_err(&dev->dev, "BAR %d: %s of %s [%#llx-%#llx]\n",
+               dev_err(&dev->dev, "BAR %d: %s of %s %pR\n",
                        resource,
                        root ? "address space collision on" :
                                "no parent found for",
-                       dtype,
-                       (unsigned long long)res->start,
-                       (unsigned long long)res->end);
+                       dtype, res);
        }
 
        return err;
@@ -139,9 +135,8 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
        align = resource_alignment(res);
        if (!align) {
                dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus "
-                       "alignment) [%#llx-%#llx] flags %#lx\n",
-                       resno, (unsigned long long)res->start,
-                       (unsigned long long)res->end, res->flags);
+                       "alignment) %pR flags %#lx\n",
+                       resno, res, res->flags);
                return -EINVAL;
        }
 
@@ -162,11 +157,8 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
        }
 
        if (ret) {
-               dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
-                       "[%#llx-%#llx]\n", resno,
-                       res->flags & IORESOURCE_IO ? "I/O" : "mem",
-                       (unsigned long long)res->start,
-                       (unsigned long long)res->end);
+               dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
+                       resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
        } else {
                res->flags &= ~IORESOURCE_STARTALIGN;
                if (resno < PCI_BRIDGE_RESOURCES)
@@ -202,11 +194,8 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno)
        }
 
        if (ret) {
-               dev_err(&dev->dev, "BAR %d: can't allocate %s resource "
-                       "[%#llx-%#llx\n]", resno,
-                       res->flags & IORESOURCE_IO ? "I/O" : "mem",
-                       (unsigned long long)res->start,
-                       (unsigned long long)res->end);
+               dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
+                       resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
        } else if (resno < PCI_BRIDGE_RESOURCES) {
                pci_update_resource(dev, res, resno);
        }
@@ -237,9 +226,8 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
                r_align = resource_alignment(r);
                if (!r_align) {
                        dev_warn(&dev->dev, "BAR %d: bogus alignment "
-                               "[%#llx-%#llx] flags %#lx\n",
-                               i, (unsigned long long)r->start,
-                               (unsigned long long)r->end, r->flags);
+                               "%pR flags %#lx\n",
+                               i, r, r->flags);
                        continue;
                }
                for (list = head; ; list = list->next) {
@@ -287,9 +275,7 @@ int pci_enable_resources(struct pci_dev *dev, int mask)
 
                if (!r->parent) {
                        dev_err(&dev->dev, "device not available because of "
-                               "BAR %d [%#llx-%#llx] collisions\n", i,
-                               (unsigned long long) r->start,
-                               (unsigned long long) r->end);
+                               "BAR %d %pR collisions\n", i, r);
                        return -EINVAL;
                }
 
index fe2aeb11939b3d9a327167e3d162043a6c0f9914..23ae8460f5c1a71f7b794745837799ac1fc21f25 100644 (file)
@@ -30,7 +30,7 @@
 
 #define POWER_SUPPLY_ATTR(_name)                                       \
 {                                                                      \
-       .attr = { .name = #_name, .mode = 0444, .owner = THIS_MODULE }, \
+       .attr = { .name = #_name, .mode = 0444 },       \
        .show = power_supply_show_property,                             \
        .store = NULL,                                                  \
 }
index 6f2f90ebb020db95256f2bc135414ada5ca77d83..06848b254d5752d3a5d40ad2a68b8c4bb686b01e 100644 (file)
@@ -915,6 +915,22 @@ int ps3av_video_mute(int mute)
 
 EXPORT_SYMBOL_GPL(ps3av_video_mute);
 
+/* mute analog output only */
+int ps3av_audio_mute_analog(int mute)
+{
+       int i, res;
+
+       for (i = 0; i < ps3av->av_hw_conf.num_of_avmulti; i++) {
+               res = ps3av_cmd_av_audio_mute(1,
+                       &ps3av->av_port[i + ps3av->av_hw_conf.num_of_hdmi],
+                       mute);
+               if (res < 0)
+                       return -1;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ps3av_audio_mute_analog);
+
 int ps3av_audio_mute(int mute)
 {
        return ps3av_set_audio_mute(mute ? PS3AV_CMD_MUTE_ON
index 7f880c26122f36a11b7390a14587693f10898229..11eb50318fec9ce093e6b9ef99e6fd858c4ead20 100644 (file)
@@ -660,9 +660,10 @@ u32 ps3av_cmd_set_av_audio_param(void *p, u32 port,
 }
 
 /* default cs val */
-static const u8 ps3av_mode_cs_info[] = {
+u8 ps3av_mode_cs_info[] = {
        0x00, 0x09, 0x00, 0x02, 0x01, 0x00, 0x00, 0x00
 };
+EXPORT_SYMBOL_GPL(ps3av_mode_cs_info);
 
 #define CS_44  0x00
 #define CS_48  0x02
@@ -677,7 +678,7 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
                              u32 ch, u32 fs, u32 word_bits, u32 format,
                              u32 source)
 {
-       int spdif_through, spdif_bitstream;
+       int spdif_through;
        int i;
 
        if (!(ch | fs | format | word_bits | source)) {
@@ -687,7 +688,6 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
                format = PS3AV_CMD_AUDIO_FORMAT_PCM;
                source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
        }
-       spdif_through = spdif_bitstream = 0;    /* XXX not supported */
 
        /* audio mode */
        memset(audio, 0, sizeof(*audio));
@@ -777,16 +777,17 @@ void ps3av_cmd_set_audio_mode(struct ps3av_pkt_audio_mode *audio, u32 avport,
                break;
        }
 
+       /* non-audio bit */
+       spdif_through = audio->audio_cs_info[0] & 0x02;
+
        /* pass through setting */
        if (spdif_through &&
            (avport == PS3AV_CMD_AVPORT_SPDIF_0 ||
-            avport == PS3AV_CMD_AVPORT_SPDIF_1)) {
+            avport == PS3AV_CMD_AVPORT_SPDIF_1 ||
+            avport == PS3AV_CMD_AVPORT_HDMI_0 ||
+            avport == PS3AV_CMD_AVPORT_HDMI_1)) {
                audio->audio_word_bits = PS3AV_CMD_AUDIO_WORD_BITS_16;
-               audio->audio_source = PS3AV_CMD_AUDIO_SOURCE_SPDIF;
-               if (spdif_bitstream) {
-                       audio->audio_format = PS3AV_CMD_AUDIO_FORMAT_BITSTREAM;
-                       audio->audio_cs_info[0] |= CS_BIT;
-               }
+               audio->audio_format = PS3AV_CMD_AUDIO_FORMAT_BITSTREAM;
        }
 }
 
index 37082616482bd7a79ad3162ba00ca82bd260894e..b5bf937069134937d064306b92e8e885948feaf0 100644 (file)
@@ -53,21 +53,21 @@ static void at91_rtc_decodetime(unsigned int timereg, unsigned int calreg,
        } while ((time != at91_sys_read(timereg)) ||
                        (date != at91_sys_read(calreg)));
 
-       tm->tm_sec  = BCD2BIN((time & AT91_RTC_SEC) >> 0);
-       tm->tm_min  = BCD2BIN((time & AT91_RTC_MIN) >> 8);
-       tm->tm_hour = BCD2BIN((time & AT91_RTC_HOUR) >> 16);
+       tm->tm_sec  = bcd2bin((time & AT91_RTC_SEC) >> 0);
+       tm->tm_min  = bcd2bin((time & AT91_RTC_MIN) >> 8);
+       tm->tm_hour = bcd2bin((time & AT91_RTC_HOUR) >> 16);
 
        /*
         * The Calendar Alarm register does not have a field for
         * the year - so these will return an invalid value.  When an
         * alarm is set, at91_alarm_year wille store the current year.
         */
-       tm->tm_year  = BCD2BIN(date & AT91_RTC_CENT) * 100;     /* century */
-       tm->tm_year += BCD2BIN((date & AT91_RTC_YEAR) >> 8);    /* year */
+       tm->tm_year  = bcd2bin(date & AT91_RTC_CENT) * 100;     /* century */
+       tm->tm_year += bcd2bin((date & AT91_RTC_YEAR) >> 8);    /* year */
 
-       tm->tm_wday = BCD2BIN((date & AT91_RTC_DAY) >> 21) - 1; /* day of the week [0-6], Sunday=0 */
-       tm->tm_mon  = BCD2BIN((date & AT91_RTC_MONTH) >> 16) - 1;
-       tm->tm_mday = BCD2BIN((date & AT91_RTC_DATE) >> 24);
+       tm->tm_wday = bcd2bin((date & AT91_RTC_DAY) >> 21) - 1; /* day of the week [0-6], Sunday=0 */
+       tm->tm_mon  = bcd2bin((date & AT91_RTC_MONTH) >> 16) - 1;
+       tm->tm_mday = bcd2bin((date & AT91_RTC_DATE) >> 24);
 }
 
 /*
@@ -106,16 +106,16 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm)
        at91_sys_write(AT91_RTC_IDR, AT91_RTC_ACKUPD);
 
        at91_sys_write(AT91_RTC_TIMR,
-                         BIN2BCD(tm->tm_sec) << 0
-                       | BIN2BCD(tm->tm_min) << 8
-                       | BIN2BCD(tm->tm_hour) << 16);
+                         bin2bcd(tm->tm_sec) << 0
+                       | bin2bcd(tm->tm_min) << 8
+                       | bin2bcd(tm->tm_hour) << 16);
 
        at91_sys_write(AT91_RTC_CALR,
-                         BIN2BCD((tm->tm_year + 1900) / 100)   /* century */
-                       | BIN2BCD(tm->tm_year % 100) << 8       /* year */
-                       | BIN2BCD(tm->tm_mon + 1) << 16         /* tm_mon starts at zero */
-                       | BIN2BCD(tm->tm_wday + 1) << 21        /* day of the week [0-6], Sunday=0 */
-                       | BIN2BCD(tm->tm_mday) << 24);
+                         bin2bcd((tm->tm_year + 1900) / 100)   /* century */
+                       | bin2bcd(tm->tm_year % 100) << 8       /* year */
+                       | bin2bcd(tm->tm_mon + 1) << 16         /* tm_mon starts at zero */
+                       | bin2bcd(tm->tm_wday + 1) << 21        /* day of the week [0-6], Sunday=0 */
+                       | bin2bcd(tm->tm_mday) << 24);
 
        /* Restart Time/Calendar */
        cr = at91_sys_read(AT91_RTC_CR);
@@ -162,13 +162,13 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 
        at91_sys_write(AT91_RTC_IDR, AT91_RTC_ALARM);
        at91_sys_write(AT91_RTC_TIMALR,
-                 BIN2BCD(tm.tm_sec) << 0
-               | BIN2BCD(tm.tm_min) << 8
-               | BIN2BCD(tm.tm_hour) << 16
+                 bin2bcd(tm.tm_sec) << 0
+               | bin2bcd(tm.tm_min) << 8
+               | bin2bcd(tm.tm_hour) << 16
                | AT91_RTC_HOUREN | AT91_RTC_MINEN | AT91_RTC_SECEN);
        at91_sys_write(AT91_RTC_CALALR,
-                 BIN2BCD(tm.tm_mon + 1) << 16          /* tm_mon starts at zero */
-               | BIN2BCD(tm.tm_mday) << 24
+                 bin2bcd(tm.tm_mon + 1) << 16          /* tm_mon starts at zero */
+               | bin2bcd(tm.tm_mday) << 24
                | AT91_RTC_DATEEN | AT91_RTC_MTHEN);
 
        if (alrm->enabled) {
index 189a018bdf34ac2a731c11b86f06cb897194e9ba..d00a274df8fcedf76d00d52d668493e9e9e068cf 100644 (file)
@@ -71,14 +71,14 @@ static int bq4802_read_time(struct device *dev, struct rtc_time *tm)
 
        spin_unlock_irqrestore(&p->lock, flags);
 
-       BCD_TO_BIN(tm->tm_sec);
-       BCD_TO_BIN(tm->tm_min);
-       BCD_TO_BIN(tm->tm_hour);
-       BCD_TO_BIN(tm->tm_mday);
-       BCD_TO_BIN(tm->tm_mon);
-       BCD_TO_BIN(tm->tm_year);
-       BCD_TO_BIN(tm->tm_wday);
-       BCD_TO_BIN(century);
+       tm->tm_sec = bcd2bin(tm->tm_sec);
+       tm->tm_min = bcd2bin(tm->tm_min);
+       tm->tm_hour = bcd2bin(tm->tm_hour);
+       tm->tm_mday = bcd2bin(tm->tm_mday);
+       tm->tm_mon = bcd2bin(tm->tm_mon);
+       tm->tm_year = bcd2bin(tm->tm_year);
+       tm->tm_wday = bcd2bin(tm->tm_wday);
+       century = bcd2bin(century);
 
        tm->tm_year += (century * 100);
        tm->tm_year -= 1900;
@@ -106,13 +106,13 @@ static int bq4802_set_time(struct device *dev, struct rtc_time *tm)
        min = tm->tm_min;
        sec = tm->tm_sec;
 
-       BIN_TO_BCD(sec);
-       BIN_TO_BCD(min);
-       BIN_TO_BCD(hrs);
-       BIN_TO_BCD(day);
-       BIN_TO_BCD(mon);
-       BIN_TO_BCD(yrs);
-       BIN_TO_BCD(century);
+       sec = bin2bcd(sec);
+       min = bin2bcd(min);
+       hrs = bin2bcd(hrs);
+       day = bin2bcd(day);
+       mon = bin2bcd(mon);
+       yrs = bin2bcd(yrs);
+       century = bin2bcd(century);
 
        spin_lock_irqsave(&p->lock, flags);
 
index 963ad0b6a4e9da64ff911cba52508e5c39b8085a..5549231179a2dacb955357e69cd6b521d5e7954f 100644 (file)
@@ -143,6 +143,43 @@ static inline int hpet_unregister_irq_handler(irq_handler_t handler)
 
 /*----------------------------------------------------------------*/
 
+#ifdef RTC_PORT
+
+/* Most newer x86 systems have two register banks, the first used
+ * for RTC and NVRAM and the second only for NVRAM.  Caller must
+ * own rtc_lock ... and we won't worry about access during NMI.
+ */
+#define can_bank2      true
+
+static inline unsigned char cmos_read_bank2(unsigned char addr)
+{
+       outb(addr, RTC_PORT(2));
+       return inb(RTC_PORT(3));
+}
+
+static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+{
+       outb(addr, RTC_PORT(2));
+       outb(val, RTC_PORT(2));
+}
+
+#else
+
+#define can_bank2      false
+
+static inline unsigned char cmos_read_bank2(unsigned char addr)
+{
+       return 0;
+}
+
+static inline void cmos_write_bank2(unsigned char val, unsigned char addr)
+{
+}
+
+#endif
+
+/*----------------------------------------------------------------*/
+
 static int cmos_read_time(struct device *dev, struct rtc_time *t)
 {
        /* REVISIT:  if the clock has a "century" register, use
@@ -203,26 +240,26 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
        /* REVISIT this assumes PC style usage:  always BCD */
 
        if (((unsigned)t->time.tm_sec) < 0x60)
-               t->time.tm_sec = BCD2BIN(t->time.tm_sec);
+               t->time.tm_sec = bcd2bin(t->time.tm_sec);
        else
                t->time.tm_sec = -1;
        if (((unsigned)t->time.tm_min) < 0x60)
-               t->time.tm_min = BCD2BIN(t->time.tm_min);
+               t->time.tm_min = bcd2bin(t->time.tm_min);
        else
                t->time.tm_min = -1;
        if (((unsigned)t->time.tm_hour) < 0x24)
-               t->time.tm_hour = BCD2BIN(t->time.tm_hour);
+               t->time.tm_hour = bcd2bin(t->time.tm_hour);
        else
                t->time.tm_hour = -1;
 
        if (cmos->day_alrm) {
                if (((unsigned)t->time.tm_mday) <= 0x31)
-                       t->time.tm_mday = BCD2BIN(t->time.tm_mday);
+                       t->time.tm_mday = bcd2bin(t->time.tm_mday);
                else
                        t->time.tm_mday = -1;
                if (cmos->mon_alrm) {
                        if (((unsigned)t->time.tm_mon) <= 0x12)
-                               t->time.tm_mon = BCD2BIN(t->time.tm_mon) - 1;
+                               t->time.tm_mon = bcd2bin(t->time.tm_mon) - 1;
                        else
                                t->time.tm_mon = -1;
                }
@@ -294,19 +331,19 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
        /* Writing 0xff means "don't care" or "match all".  */
 
        mon = t->time.tm_mon + 1;
-       mon = (mon <= 12) ? BIN2BCD(mon) : 0xff;
+       mon = (mon <= 12) ? bin2bcd(mon) : 0xff;
 
        mday = t->time.tm_mday;
-       mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff;
+       mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
 
        hrs = t->time.tm_hour;
-       hrs = (hrs < 24) ? BIN2BCD(hrs) : 0xff;
+       hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff;
 
        min = t->time.tm_min;
-       min = (min < 60) ? BIN2BCD(min) : 0xff;
+       min = (min < 60) ? bin2bcd(min) : 0xff;
 
        sec = t->time.tm_sec;
-       sec = (sec < 60) ? BIN2BCD(sec) : 0xff;
+       sec = (sec < 60) ? bin2bcd(sec) : 0xff;
 
        spin_lock_irq(&rtc_lock);
 
@@ -491,12 +528,21 @@ cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 
        if (unlikely(off >= attr->size))
                return 0;
+       if (unlikely(off < 0))
+               return -EINVAL;
        if ((off + count) > attr->size)
                count = attr->size - off;
 
+       off += NVRAM_OFFSET;
        spin_lock_irq(&rtc_lock);
-       for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++)
-               *buf++ = CMOS_READ(off);
+       for (retval = 0; count; count--, off++, retval++) {
+               if (off < 128)
+                       *buf++ = CMOS_READ(off);
+               else if (can_bank2)
+                       *buf++ = cmos_read_bank2(off);
+               else
+                       break;
+       }
        spin_unlock_irq(&rtc_lock);
 
        return retval;
@@ -512,6 +558,8 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
        cmos = dev_get_drvdata(container_of(kobj, struct device, kobj));
        if (unlikely(off >= attr->size))
                return -EFBIG;
+       if (unlikely(off < 0))
+               return -EINVAL;
        if ((off + count) > attr->size)
                count = attr->size - off;
 
@@ -520,15 +568,20 @@ cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
         * here.  If userspace is smart enough to know what fields of
         * NVRAM to update, updating checksums is also part of its job.
         */
+       off += NVRAM_OFFSET;
        spin_lock_irq(&rtc_lock);
-       for (retval = 0, off += NVRAM_OFFSET; count--; retval++, off++) {
+       for (retval = 0; count; count--, off++, retval++) {
                /* don't trash RTC registers */
                if (off == cmos->day_alrm
                                || off == cmos->mon_alrm
                                || off == cmos->century)
                        buf++;
-               else
+               else if (off < 128)
                        CMOS_WRITE(*buf++, off);
+               else if (can_bank2)
+                       cmos_write_bank2(*buf++, off);
+               else
+                       break;
        }
        spin_unlock_irq(&rtc_lock);
 
@@ -539,7 +592,6 @@ static struct bin_attribute nvram = {
        .attr = {
                .name   = "nvram",
                .mode   = S_IRUGO | S_IWUSR,
-               .owner  = THIS_MODULE,
        },
 
        .read   = cmos_nvram_read,
@@ -631,8 +683,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 
        /* Heuristic to deduce NVRAM size ... do what the legacy NVRAM
         * driver did, but don't reject unknown configs.   Old hardware
-        * won't address 128 bytes, and for now we ignore the way newer
-        * chips can address 256 bytes (using two more i/o ports).
+        * won't address 128 bytes.  Newer chips have multiple banks,
+        * though they may not be listed in one I/O resource.
         */
 #if    defined(CONFIG_ATARI)
        address_space = 64;
@@ -642,6 +694,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 #warning Assuming 128 bytes of RTC+NVRAM address space, not 64 bytes.
        address_space = 128;
 #endif
+       if (can_bank2 && ports->end > (ports->start + 1))
+               address_space = 256;
 
        /* For ACPI systems extension info comes from the FADT.  On others,
         * board specific setup provides it as appropriate.  Systems where
@@ -740,7 +794,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                goto cleanup2;
        }
 
-       pr_info("%s: alarms up to one %s%s%s\n",
+       pr_info("%s: alarms up to one %s%s, %zd bytes nvram, %s irqs\n",
                        cmos_rtc.rtc->dev.bus_id,
                        is_valid_irq(rtc_irq)
                                ?  (cmos_rtc.mon_alrm
@@ -749,6 +803,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                                                ? "month" : "day"))
                                : "no",
                        cmos_rtc.century ? ", y3k" : "",
+                       nvram.size,
                        is_hpet_enabled() ? ", hpet irqs" : "");
 
        return 0;
index 0b17770b032be69eb27d3e86f2621cd752ebdcb1..9a234a4ec06dbdf0a964acae4bc6a58be3588871 100644 (file)
@@ -86,19 +86,19 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm)
        ds1216_switch_ds_to_clock(priv->ioaddr);
        ds1216_read(priv->ioaddr, (u8 *)&regs);
 
-       tm->tm_sec = BCD2BIN(regs.sec);
-       tm->tm_min = BCD2BIN(regs.min);
+       tm->tm_sec = bcd2bin(regs.sec);
+       tm->tm_min = bcd2bin(regs.min);
        if (regs.hour & DS1216_HOUR_1224) {
                /* AM/PM mode */
-               tm->tm_hour = BCD2BIN(regs.hour & 0x1f);
+               tm->tm_hour = bcd2bin(regs.hour & 0x1f);
                if (regs.hour & DS1216_HOUR_AMPM)
                        tm->tm_hour += 12;
        } else
-               tm->tm_hour = BCD2BIN(regs.hour & 0x3f);
+               tm->tm_hour = bcd2bin(regs.hour & 0x3f);
        tm->tm_wday = (regs.wday & 7) - 1;
-       tm->tm_mday = BCD2BIN(regs.mday & 0x3f);
-       tm->tm_mon = BCD2BIN(regs.month & 0x1f);
-       tm->tm_year = BCD2BIN(regs.year);
+       tm->tm_mday = bcd2bin(regs.mday & 0x3f);
+       tm->tm_mon = bcd2bin(regs.month & 0x1f);
+       tm->tm_year = bcd2bin(regs.year);
        if (tm->tm_year < 70)
                tm->tm_year += 100;
        return 0;
@@ -114,19 +114,19 @@ static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
        ds1216_read(priv->ioaddr, (u8 *)&regs);
 
        regs.tsec = 0; /* clear 0.1 and 0.01 seconds */
-       regs.sec = BIN2BCD(tm->tm_sec);
-       regs.min = BIN2BCD(tm->tm_min);
+       regs.sec = bin2bcd(tm->tm_sec);
+       regs.min = bin2bcd(tm->tm_min);
        regs.hour &= DS1216_HOUR_1224;
        if (regs.hour && tm->tm_hour > 12) {
                regs.hour |= DS1216_HOUR_AMPM;
                tm->tm_hour -= 12;
        }
-       regs.hour |= BIN2BCD(tm->tm_hour);
+       regs.hour |= bin2bcd(tm->tm_hour);
        regs.wday &= ~7;
        regs.wday |= tm->tm_wday;
-       regs.mday = BIN2BCD(tm->tm_mday);
-       regs.month = BIN2BCD(tm->tm_mon);
-       regs.year = BIN2BCD(tm->tm_year % 100);
+       regs.mday = bin2bcd(tm->tm_mday);
+       regs.month = bin2bcd(tm->tm_mon);
+       regs.year = bin2bcd(tm->tm_year % 100);
 
        ds1216_switch_ds_to_clock(priv->ioaddr);
        ds1216_write(priv->ioaddr, (u8 *)&regs);
index 8f4e96bb229a06f4d073a96fa4033dcf4ff8899e..184556620778c5da5f8ec45dc3144d3b611a47ca 100644 (file)
@@ -107,13 +107,13 @@ static int ds1302_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        spin_lock_irq(&rtc->lock);
 
-       tm->tm_sec      = BCD2BIN(ds1302_readbyte(RTC_ADDR_SEC));
-       tm->tm_min      = BCD2BIN(ds1302_readbyte(RTC_ADDR_MIN));
-       tm->tm_hour     = BCD2BIN(ds1302_readbyte(RTC_ADDR_HOUR));
-       tm->tm_wday     = BCD2BIN(ds1302_readbyte(RTC_ADDR_DAY));
-       tm->tm_mday     = BCD2BIN(ds1302_readbyte(RTC_ADDR_DATE));
-       tm->tm_mon      = BCD2BIN(ds1302_readbyte(RTC_ADDR_MON)) - 1;
-       tm->tm_year     = BCD2BIN(ds1302_readbyte(RTC_ADDR_YEAR));
+       tm->tm_sec      = bcd2bin(ds1302_readbyte(RTC_ADDR_SEC));
+       tm->tm_min      = bcd2bin(ds1302_readbyte(RTC_ADDR_MIN));
+       tm->tm_hour     = bcd2bin(ds1302_readbyte(RTC_ADDR_HOUR));
+       tm->tm_wday     = bcd2bin(ds1302_readbyte(RTC_ADDR_DAY));
+       tm->tm_mday     = bcd2bin(ds1302_readbyte(RTC_ADDR_DATE));
+       tm->tm_mon      = bcd2bin(ds1302_readbyte(RTC_ADDR_MON)) - 1;
+       tm->tm_year     = bcd2bin(ds1302_readbyte(RTC_ADDR_YEAR));
 
        if (tm->tm_year < 70)
                tm->tm_year += 100;
@@ -141,13 +141,13 @@ static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *tm)
        /* Stop RTC */
        ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) | 0x80);
 
-       ds1302_writebyte(RTC_ADDR_SEC, BIN2BCD(tm->tm_sec));
-       ds1302_writebyte(RTC_ADDR_MIN, BIN2BCD(tm->tm_min));
-       ds1302_writebyte(RTC_ADDR_HOUR, BIN2BCD(tm->tm_hour));
-       ds1302_writebyte(RTC_ADDR_DAY, BIN2BCD(tm->tm_wday));
-       ds1302_writebyte(RTC_ADDR_DATE, BIN2BCD(tm->tm_mday));
-       ds1302_writebyte(RTC_ADDR_MON, BIN2BCD(tm->tm_mon + 1));
-       ds1302_writebyte(RTC_ADDR_YEAR, BIN2BCD(tm->tm_year % 100));
+       ds1302_writebyte(RTC_ADDR_SEC, bin2bcd(tm->tm_sec));
+       ds1302_writebyte(RTC_ADDR_MIN, bin2bcd(tm->tm_min));
+       ds1302_writebyte(RTC_ADDR_HOUR, bin2bcd(tm->tm_hour));
+       ds1302_writebyte(RTC_ADDR_DAY, bin2bcd(tm->tm_wday));
+       ds1302_writebyte(RTC_ADDR_DATE, bin2bcd(tm->tm_mday));
+       ds1302_writebyte(RTC_ADDR_MON, bin2bcd(tm->tm_mon + 1));
+       ds1302_writebyte(RTC_ADDR_YEAR, bin2bcd(tm->tm_year % 100));
 
        /* Start RTC */
        ds1302_writebyte(RTC_ADDR_SEC, ds1302_readbyte(RTC_ADDR_SEC) & ~0x80);
index b91d02a3ace9d89846c83a257294294d8df36b98..fc372df6534b5e93b3296913be692188cd5c1d41 100644 (file)
@@ -114,10 +114,10 @@ static unsigned bcd2hour(u8 bcd)
                        hour = 12;
                        bcd &= ~DS1305_HR_PM;
                }
-               hour += BCD2BIN(bcd);
+               hour += bcd2bin(bcd);
                return hour - 1;
        }
-       return BCD2BIN(bcd);
+       return bcd2bin(bcd);
 }
 
 static u8 hour2bcd(bool hr12, int hour)
@@ -125,11 +125,11 @@ static u8 hour2bcd(bool hr12, int hour)
        if (hr12) {
                hour++;
                if (hour <= 12)
-                       return DS1305_HR_12 | BIN2BCD(hour);
+                       return DS1305_HR_12 | bin2bcd(hour);
                hour -= 12;
-               return DS1305_HR_12 | DS1305_HR_PM | BIN2BCD(hour);
+               return DS1305_HR_12 | DS1305_HR_PM | bin2bcd(hour);
        }
-       return BIN2BCD(hour);
+       return bin2bcd(hour);
 }
 
 /*----------------------------------------------------------------------*/
@@ -206,13 +206,13 @@ static int ds1305_get_time(struct device *dev, struct rtc_time *time)
                buf[4], buf[5], buf[6]);
 
        /* Decode the registers */
-       time->tm_sec = BCD2BIN(buf[DS1305_SEC]);
-       time->tm_min = BCD2BIN(buf[DS1305_MIN]);
+       time->tm_sec = bcd2bin(buf[DS1305_SEC]);
+       time->tm_min = bcd2bin(buf[DS1305_MIN]);
        time->tm_hour = bcd2hour(buf[DS1305_HOUR]);
        time->tm_wday = buf[DS1305_WDAY] - 1;
-       time->tm_mday = BCD2BIN(buf[DS1305_MDAY]);
-       time->tm_mon = BCD2BIN(buf[DS1305_MON]) - 1;
-       time->tm_year = BCD2BIN(buf[DS1305_YEAR]) + 100;
+       time->tm_mday = bcd2bin(buf[DS1305_MDAY]);
+       time->tm_mon = bcd2bin(buf[DS1305_MON]) - 1;
+       time->tm_year = bcd2bin(buf[DS1305_YEAR]) + 100;
 
        dev_vdbg(dev, "%s secs=%d, mins=%d, "
                "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -239,13 +239,13 @@ static int ds1305_set_time(struct device *dev, struct rtc_time *time)
        /* Write registers starting at the first time/date address. */
        *bp++ = DS1305_WRITE | DS1305_SEC;
 
-       *bp++ = BIN2BCD(time->tm_sec);
-       *bp++ = BIN2BCD(time->tm_min);
+       *bp++ = bin2bcd(time->tm_sec);
+       *bp++ = bin2bcd(time->tm_min);
        *bp++ = hour2bcd(ds1305->hr12, time->tm_hour);
        *bp++ = (time->tm_wday < 7) ? (time->tm_wday + 1) : 1;
-       *bp++ = BIN2BCD(time->tm_mday);
-       *bp++ = BIN2BCD(time->tm_mon + 1);
-       *bp++ = BIN2BCD(time->tm_year - 100);
+       *bp++ = bin2bcd(time->tm_mday);
+       *bp++ = bin2bcd(time->tm_mon + 1);
+       *bp++ = bin2bcd(time->tm_year - 100);
 
        dev_dbg(dev, "%s: %02x %02x %02x, %02x %02x %02x %02x\n",
                "write", buf[1], buf[2], buf[3],
@@ -329,8 +329,8 @@ static int ds1305_get_alarm(struct device *dev, struct rtc_wkalrm *alm)
         * fill in the rest ... and also handle rollover to tomorrow when
         * that's needed.
         */
-       alm->time.tm_sec = BCD2BIN(buf[DS1305_SEC]);
-       alm->time.tm_min = BCD2BIN(buf[DS1305_MIN]);
+       alm->time.tm_sec = bcd2bin(buf[DS1305_SEC]);
+       alm->time.tm_min = bcd2bin(buf[DS1305_MIN]);
        alm->time.tm_hour = bcd2hour(buf[DS1305_HOUR]);
        alm->time.tm_mday = -1;
        alm->time.tm_mon = -1;
@@ -387,8 +387,8 @@ static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 
        /* write alarm */
        buf[0] = DS1305_WRITE | DS1305_ALM0(DS1305_SEC);
-       buf[1 + DS1305_SEC] = BIN2BCD(alm->time.tm_sec);
-       buf[1 + DS1305_MIN] = BIN2BCD(alm->time.tm_min);
+       buf[1 + DS1305_SEC] = bin2bcd(alm->time.tm_sec);
+       buf[1 + DS1305_MIN] = bin2bcd(alm->time.tm_min);
        buf[1 + DS1305_HOUR] = hour2bcd(ds1305->hr12, alm->time.tm_hour);
        buf[1 + DS1305_WDAY] = DS1305_ALM_DISABLE;
 
@@ -606,7 +606,6 @@ ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
 static struct bin_attribute nvram = {
        .attr.name      = "nvram",
        .attr.mode      = S_IRUGO | S_IWUSR,
-       .attr.owner     = THIS_MODULE,
        .read           = ds1305_nvram_read,
        .write          = ds1305_nvram_write,
        .size           = DS1305_NVRAM_LEN,
index 4fcf0734a6ef088f5bf162de0f09341bac979349..162330b9d1dc0069ab94397e071d1d04e5c78865 100644 (file)
@@ -222,17 +222,17 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
                        ds1307->regs[4], ds1307->regs[5],
                        ds1307->regs[6]);
 
-       t->tm_sec = BCD2BIN(ds1307->regs[DS1307_REG_SECS] & 0x7f);
-       t->tm_min = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f);
+       t->tm_sec = bcd2bin(ds1307->regs[DS1307_REG_SECS] & 0x7f);
+       t->tm_min = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
        tmp = ds1307->regs[DS1307_REG_HOUR] & 0x3f;
-       t->tm_hour = BCD2BIN(tmp);
-       t->tm_wday = BCD2BIN(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1;
-       t->tm_mday = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
+       t->tm_hour = bcd2bin(tmp);
+       t->tm_wday = bcd2bin(ds1307->regs[DS1307_REG_WDAY] & 0x07) - 1;
+       t->tm_mday = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
        tmp = ds1307->regs[DS1307_REG_MONTH] & 0x1f;
-       t->tm_mon = BCD2BIN(tmp) - 1;
+       t->tm_mon = bcd2bin(tmp) - 1;
 
        /* assume 20YY not 19YY, and ignore DS1337_BIT_CENTURY */
-       t->tm_year = BCD2BIN(ds1307->regs[DS1307_REG_YEAR]) + 100;
+       t->tm_year = bcd2bin(ds1307->regs[DS1307_REG_YEAR]) + 100;
 
        dev_dbg(dev, "%s secs=%d, mins=%d, "
                "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -258,16 +258,16 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
                t->tm_mon, t->tm_year, t->tm_wday);
 
        *buf++ = 0;             /* first register addr */
-       buf[DS1307_REG_SECS] = BIN2BCD(t->tm_sec);
-       buf[DS1307_REG_MIN] = BIN2BCD(t->tm_min);
-       buf[DS1307_REG_HOUR] = BIN2BCD(t->tm_hour);
-       buf[DS1307_REG_WDAY] = BIN2BCD(t->tm_wday + 1);
-       buf[DS1307_REG_MDAY] = BIN2BCD(t->tm_mday);
-       buf[DS1307_REG_MONTH] = BIN2BCD(t->tm_mon + 1);
+       buf[DS1307_REG_SECS] = bin2bcd(t->tm_sec);
+       buf[DS1307_REG_MIN] = bin2bcd(t->tm_min);
+       buf[DS1307_REG_HOUR] = bin2bcd(t->tm_hour);
+       buf[DS1307_REG_WDAY] = bin2bcd(t->tm_wday + 1);
+       buf[DS1307_REG_MDAY] = bin2bcd(t->tm_mday);
+       buf[DS1307_REG_MONTH] = bin2bcd(t->tm_mon + 1);
 
        /* assume 20YY not 19YY */
        tmp = t->tm_year - 100;
-       buf[DS1307_REG_YEAR] = BIN2BCD(tmp);
+       buf[DS1307_REG_YEAR] = bin2bcd(tmp);
 
        switch (ds1307->type) {
        case ds_1337:
@@ -551,7 +551,6 @@ static struct bin_attribute nvram = {
        .attr = {
                .name   = "nvram",
                .mode   = S_IRUGO | S_IWUSR,
-               .owner  = THIS_MODULE,
        },
 
        .read   = ds1307_nvram_read,
@@ -709,18 +708,18 @@ read_rtc:
        }
 
        tmp = ds1307->regs[DS1307_REG_SECS];
-       tmp = BCD2BIN(tmp & 0x7f);
+       tmp = bcd2bin(tmp & 0x7f);
        if (tmp > 60)
                goto exit_bad;
-       tmp = BCD2BIN(ds1307->regs[DS1307_REG_MIN] & 0x7f);
+       tmp = bcd2bin(ds1307->regs[DS1307_REG_MIN] & 0x7f);
        if (tmp > 60)
                goto exit_bad;
 
-       tmp = BCD2BIN(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
+       tmp = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
        if (tmp == 0 || tmp > 31)
                goto exit_bad;
 
-       tmp = BCD2BIN(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
+       tmp = bcd2bin(ds1307->regs[DS1307_REG_MONTH] & 0x1f);
        if (tmp == 0 || tmp > 12)
                goto exit_bad;
 
@@ -739,14 +738,14 @@ read_rtc:
                /* Be sure we're in 24 hour mode.  Multi-master systems
                 * take note...
                 */
-               tmp = BCD2BIN(tmp & 0x1f);
+               tmp = bcd2bin(tmp & 0x1f);
                if (tmp == 12)
                        tmp = 0;
                if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM)
                        tmp += 12;
                i2c_smbus_write_byte_data(client,
                                DS1307_REG_HOUR,
-                               BIN2BCD(tmp));
+                               bin2bcd(tmp));
        }
 
        ds1307->rtc = rtc_device_register(client->name, &client->dev,
index 86981d34fbb6717a66d47a11db1287b4e3a4c7f5..25caada78398e76ee83d58507a6fed256eb306ef 100644 (file)
@@ -153,8 +153,8 @@ ds1511_wdog_set(unsigned long deciseconds)
        /*
         * set the wdog values in the wdog registers
         */
-       rtc_write(BIN2BCD(deciseconds % 100), DS1511_WD_MSEC);
-       rtc_write(BIN2BCD(deciseconds / 100), DS1511_WD_SEC);
+       rtc_write(bin2bcd(deciseconds % 100), DS1511_WD_MSEC);
+       rtc_write(bin2bcd(deciseconds / 100), DS1511_WD_SEC);
        /*
         * set wdog enable and wdog 'steering' bit to issue a reset
         */
@@ -220,13 +220,13 @@ static int ds1511_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
        /*
         * each register is a different number of valid bits
         */
-       sec = BIN2BCD(sec) & 0x7f;
-       min = BIN2BCD(min) & 0x7f;
-       hrs = BIN2BCD(hrs) & 0x3f;
-       day = BIN2BCD(day) & 0x3f;
-       mon = BIN2BCD(mon) & 0x1f;
-       yrs = BIN2BCD(yrs) & 0xff;
-       cen = BIN2BCD(cen) & 0xff;
+       sec = bin2bcd(sec) & 0x7f;
+       min = bin2bcd(min) & 0x7f;
+       hrs = bin2bcd(hrs) & 0x3f;
+       day = bin2bcd(day) & 0x3f;
+       mon = bin2bcd(mon) & 0x1f;
+       yrs = bin2bcd(yrs) & 0xff;
+       cen = bin2bcd(cen) & 0xff;
 
        spin_lock_irqsave(&ds1511_lock, flags);
        rtc_disable_update();
@@ -264,14 +264,14 @@ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
        rtc_enable_update();
        spin_unlock_irqrestore(&ds1511_lock, flags);
 
-       rtc_tm->tm_sec = BCD2BIN(rtc_tm->tm_sec);
-       rtc_tm->tm_min = BCD2BIN(rtc_tm->tm_min);
-       rtc_tm->tm_hour = BCD2BIN(rtc_tm->tm_hour);
-       rtc_tm->tm_mday = BCD2BIN(rtc_tm->tm_mday);
-       rtc_tm->tm_wday = BCD2BIN(rtc_tm->tm_wday);
-       rtc_tm->tm_mon = BCD2BIN(rtc_tm->tm_mon);
-       rtc_tm->tm_year = BCD2BIN(rtc_tm->tm_year);
-       century = BCD2BIN(century) * 100;
+       rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+       rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+       rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+       rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+       rtc_tm->tm_wday = bcd2bin(rtc_tm->tm_wday);
+       rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+       rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
+       century = bcd2bin(century) * 100;
 
        /*
         * Account for differences between how the RTC uses the values
@@ -304,16 +304,16 @@ ds1511_rtc_update_alarm(struct rtc_plat_data *pdata)
 
        spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
        rtc_write(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_mday) & 0x3f,
+              0x80 : bin2bcd(pdata->alrm_mday) & 0x3f,
               RTC_ALARM_DATE);
        rtc_write(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_hour) & 0x3f,
+              0x80 : bin2bcd(pdata->alrm_hour) & 0x3f,
               RTC_ALARM_HOUR);
        rtc_write(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_min) & 0x7f,
+              0x80 : bin2bcd(pdata->alrm_min) & 0x7f,
               RTC_ALARM_MIN);
        rtc_write(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_sec) & 0x7f,
+              0x80 : bin2bcd(pdata->alrm_sec) & 0x7f,
               RTC_ALARM_SEC);
        rtc_write(rtc_read(RTC_CMD) | (pdata->irqen ? RTC_TIE : 0), RTC_CMD);
        rtc_read(RTC_CMD1);     /* clear interrupts */
@@ -481,7 +481,6 @@ static struct bin_attribute ds1511_nvram_attr = {
        .attr = {
                .name = "nvram",
                .mode = S_IRUGO | S_IWUGO,
-               .owner = THIS_MODULE,
        },
        .size = DS1511_RAM_MAX,
        .read = ds1511_nvram_read,
index 4ef59285b489ec28d05b9c038ae35d28e49a0b40..b9475cd2021063b926d8ff4cd16764589dfa2aac 100644 (file)
@@ -78,17 +78,17 @@ static int ds1553_rtc_set_time(struct device *dev, struct rtc_time *tm)
        void __iomem *ioaddr = pdata->ioaddr;
        u8 century;
 
-       century = BIN2BCD((tm->tm_year + 1900) / 100);
+       century = bin2bcd((tm->tm_year + 1900) / 100);
 
        writeb(RTC_WRITE, pdata->ioaddr + RTC_CONTROL);
 
-       writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
-       writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
-       writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
-       writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
-       writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
-       writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
-       writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+       writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+       writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+       writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+       writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+       writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+       writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+       writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
 
        /* RTC_CENTURY and RTC_CONTROL share same register */
        writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -118,14 +118,14 @@ static int ds1553_rtc_read_time(struct device *dev, struct rtc_time *tm)
        year = readb(ioaddr + RTC_YEAR);
        century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
        writeb(0, ioaddr + RTC_CONTROL);
-       tm->tm_sec = BCD2BIN(second);
-       tm->tm_min = BCD2BIN(minute);
-       tm->tm_hour = BCD2BIN(hour);
-       tm->tm_mday = BCD2BIN(day);
-       tm->tm_wday = BCD2BIN(week);
-       tm->tm_mon = BCD2BIN(month) - 1;
+       tm->tm_sec = bcd2bin(second);
+       tm->tm_min = bcd2bin(minute);
+       tm->tm_hour = bcd2bin(hour);
+       tm->tm_mday = bcd2bin(day);
+       tm->tm_wday = bcd2bin(week);
+       tm->tm_mon = bcd2bin(month) - 1;
        /* year is 1900 + tm->tm_year */
-       tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+       tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
        if (rtc_valid_tm(tm) < 0) {
                dev_err(dev, "retrieved date/time is not valid.\n");
@@ -141,16 +141,16 @@ static void ds1553_rtc_update_alarm(struct rtc_plat_data *pdata)
 
        spin_lock_irqsave(&pdata->rtc->irq_lock, flags);
        writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_mday),
+              0x80 : bin2bcd(pdata->alrm_mday),
               ioaddr + RTC_DATE_ALARM);
        writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_hour),
+              0x80 : bin2bcd(pdata->alrm_hour),
               ioaddr + RTC_HOURS_ALARM);
        writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_min),
+              0x80 : bin2bcd(pdata->alrm_min),
               ioaddr + RTC_MINUTES_ALARM);
        writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_sec),
+              0x80 : bin2bcd(pdata->alrm_sec),
               ioaddr + RTC_SECONDS_ALARM);
        writeb(pdata->irqen ? RTC_INTS_AE : 0, ioaddr + RTC_INTERRUPTS);
        readb(ioaddr + RTC_FLAGS);      /* clear interrupts */
index 24d35ede2dbff92de8fca76f3354337895eba97c..8bc8501bffc81f4b8996a17ef939de553f027938 100644 (file)
@@ -66,17 +66,17 @@ static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm)
        void __iomem *ioaddr = pdata->ioaddr_rtc;
        u8 century;
 
-       century = BIN2BCD((tm->tm_year + 1900) / 100);
+       century = bin2bcd((tm->tm_year + 1900) / 100);
 
        writeb(RTC_WRITE, ioaddr + RTC_CONTROL);
 
-       writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
-       writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
-       writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
-       writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
-       writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
-       writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
-       writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+       writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+       writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+       writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+       writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+       writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+       writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+       writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
 
        /* RTC_CENTURY and RTC_CONTROL share same register */
        writeb(RTC_WRITE | (century & RTC_CENTURY_MASK), ioaddr + RTC_CENTURY);
@@ -106,14 +106,14 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm)
        year = readb(ioaddr + RTC_YEAR);
        century = readb(ioaddr + RTC_CENTURY) & RTC_CENTURY_MASK;
        writeb(0, ioaddr + RTC_CONTROL);
-       tm->tm_sec = BCD2BIN(second);
-       tm->tm_min = BCD2BIN(minute);
-       tm->tm_hour = BCD2BIN(hour);
-       tm->tm_mday = BCD2BIN(day);
-       tm->tm_wday = BCD2BIN(week);
-       tm->tm_mon = BCD2BIN(month) - 1;
+       tm->tm_sec = bcd2bin(second);
+       tm->tm_min = bcd2bin(minute);
+       tm->tm_hour = bcd2bin(hour);
+       tm->tm_mday = bcd2bin(day);
+       tm->tm_wday = bcd2bin(week);
+       tm->tm_mon = bcd2bin(month) - 1;
        /* year is 1900 + tm->tm_year */
-       tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+       tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
        if (rtc_valid_tm(tm) < 0) {
                dev_err(dev, "retrieved date/time is not valid.\n");
index abfdfcbaa059f2334dea356dd5b094924783315e..3a7be11cc6b93e1a5315a40ffc25624792679443 100644 (file)
@@ -131,17 +131,17 @@ static int fm3130_get_time(struct device *dev, struct rtc_time *t)
                        fm3130->regs[0xc], fm3130->regs[0xd],
                        fm3130->regs[0xe]);
 
-       t->tm_sec = BCD2BIN(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f);
-       t->tm_min = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
+       t->tm_sec = bcd2bin(fm3130->regs[FM3130_RTC_SECONDS] & 0x7f);
+       t->tm_min = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
        tmp = fm3130->regs[FM3130_RTC_HOURS] & 0x3f;
-       t->tm_hour = BCD2BIN(tmp);
-       t->tm_wday = BCD2BIN(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1;
-       t->tm_mday = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
+       t->tm_hour = bcd2bin(tmp);
+       t->tm_wday = bcd2bin(fm3130->regs[FM3130_RTC_DAY] & 0x07) - 1;
+       t->tm_mday = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
        tmp = fm3130->regs[FM3130_RTC_MONTHS] & 0x1f;
-       t->tm_mon = BCD2BIN(tmp) - 1;
+       t->tm_mon = bcd2bin(tmp) - 1;
 
        /* assume 20YY not 19YY, and ignore CF bit */
-       t->tm_year = BCD2BIN(fm3130->regs[FM3130_RTC_YEARS]) + 100;
+       t->tm_year = bcd2bin(fm3130->regs[FM3130_RTC_YEARS]) + 100;
 
        dev_dbg(dev, "%s secs=%d, mins=%d, "
                "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -167,16 +167,16 @@ static int fm3130_set_time(struct device *dev, struct rtc_time *t)
                t->tm_mon, t->tm_year, t->tm_wday);
 
        /* first register addr */
-       buf[FM3130_RTC_SECONDS] = BIN2BCD(t->tm_sec);
-       buf[FM3130_RTC_MINUTES] = BIN2BCD(t->tm_min);
-       buf[FM3130_RTC_HOURS] = BIN2BCD(t->tm_hour);
-       buf[FM3130_RTC_DAY] = BIN2BCD(t->tm_wday + 1);
-       buf[FM3130_RTC_DATE] = BIN2BCD(t->tm_mday);
-       buf[FM3130_RTC_MONTHS] = BIN2BCD(t->tm_mon + 1);
+       buf[FM3130_RTC_SECONDS] = bin2bcd(t->tm_sec);
+       buf[FM3130_RTC_MINUTES] = bin2bcd(t->tm_min);
+       buf[FM3130_RTC_HOURS] = bin2bcd(t->tm_hour);
+       buf[FM3130_RTC_DAY] = bin2bcd(t->tm_wday + 1);
+       buf[FM3130_RTC_DATE] = bin2bcd(t->tm_mday);
+       buf[FM3130_RTC_MONTHS] = bin2bcd(t->tm_mon + 1);
 
        /* assume 20YY not 19YY */
        tmp = t->tm_year - 100;
-       buf[FM3130_RTC_YEARS] = BIN2BCD(tmp);
+       buf[FM3130_RTC_YEARS] = bin2bcd(tmp);
 
        dev_dbg(dev, "%s: %02x %02x %02x %02x %02x %02x %02x"
                "%02x %02x %02x %02x %02x %02x %02x %02x\n",
@@ -222,11 +222,11 @@ static int fm3130_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
                        fm3130->regs[FM3130_ALARM_MONTHS]);
 
 
-       tm->tm_sec      = BCD2BIN(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F);
-       tm->tm_min      = BCD2BIN(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F);
-       tm->tm_hour     = BCD2BIN(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F);
-       tm->tm_mday     = BCD2BIN(fm3130->regs[FM3130_ALARM_DATE] & 0x3F);
-       tm->tm_mon      = BCD2BIN(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F);
+       tm->tm_sec      = bcd2bin(fm3130->regs[FM3130_ALARM_SECONDS] & 0x7F);
+       tm->tm_min      = bcd2bin(fm3130->regs[FM3130_ALARM_MINUTES] & 0x7F);
+       tm->tm_hour     = bcd2bin(fm3130->regs[FM3130_ALARM_HOURS] & 0x3F);
+       tm->tm_mday     = bcd2bin(fm3130->regs[FM3130_ALARM_DATE] & 0x3F);
+       tm->tm_mon      = bcd2bin(fm3130->regs[FM3130_ALARM_MONTHS] & 0x1F);
        if (tm->tm_mon > 0)
                tm->tm_mon -= 1; /* RTC is 1-12, tm_mon is 0-11 */
        dev_dbg(dev, "%s secs=%d, mins=%d, "
@@ -252,23 +252,23 @@ static int fm3130_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
        if (tm->tm_sec != -1)
                fm3130->regs[FM3130_ALARM_SECONDS] =
-                       BIN2BCD(tm->tm_sec) | 0x80;
+                       bin2bcd(tm->tm_sec) | 0x80;
 
        if (tm->tm_min != -1)
                fm3130->regs[FM3130_ALARM_MINUTES] =
-                       BIN2BCD(tm->tm_min) | 0x80;
+                       bin2bcd(tm->tm_min) | 0x80;
 
        if (tm->tm_hour != -1)
                fm3130->regs[FM3130_ALARM_HOURS] =
-                       BIN2BCD(tm->tm_hour) | 0x80;
+                       bin2bcd(tm->tm_hour) | 0x80;
 
        if (tm->tm_mday != -1)
                fm3130->regs[FM3130_ALARM_DATE] =
-                       BIN2BCD(tm->tm_mday) | 0x80;
+                       bin2bcd(tm->tm_mday) | 0x80;
 
        if (tm->tm_mon != -1)
                fm3130->regs[FM3130_ALARM_MONTHS] =
-                       BIN2BCD(tm->tm_mon + 1) | 0x80;
+                       bin2bcd(tm->tm_mon + 1) | 0x80;
 
        dev_dbg(dev, "alarm write %02x %02x %02x %02x %02x\n",
                        fm3130->regs[FM3130_ALARM_SECONDS],
@@ -414,18 +414,18 @@ static int __devinit fm3130_probe(struct i2c_client *client,
        /* TODO */
        /* TODO need to sanity check alarm */
        tmp = fm3130->regs[FM3130_RTC_SECONDS];
-       tmp = BCD2BIN(tmp & 0x7f);
+       tmp = bcd2bin(tmp & 0x7f);
        if (tmp > 60)
                goto exit_bad;
-       tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
+       tmp = bcd2bin(fm3130->regs[FM3130_RTC_MINUTES] & 0x7f);
        if (tmp > 60)
                goto exit_bad;
 
-       tmp = BCD2BIN(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
+       tmp = bcd2bin(fm3130->regs[FM3130_RTC_DATE] & 0x3f);
        if (tmp == 0 || tmp > 31)
                goto exit_bad;
 
-       tmp = BCD2BIN(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f);
+       tmp = bcd2bin(fm3130->regs[FM3130_RTC_MONTHS] & 0x1f);
        if (tmp == 0 || tmp > 12)
                goto exit_bad;
 
index a81adab6e515849306d9fb9e245eebc0e8532784..2cd77ab8fc66b06e43d3b908884a9f4fe8603ca1 100644 (file)
@@ -259,26 +259,26 @@ isl1208_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
                return sr;
        }
 
-       tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SC]);
-       tm->tm_min = BCD2BIN(regs[ISL1208_REG_MN]);
+       tm->tm_sec = bcd2bin(regs[ISL1208_REG_SC]);
+       tm->tm_min = bcd2bin(regs[ISL1208_REG_MN]);
 
        /* HR field has a more complex interpretation */
        {
                const u8 _hr = regs[ISL1208_REG_HR];
                if (_hr & ISL1208_REG_HR_MIL)   /* 24h format */
-                       tm->tm_hour = BCD2BIN(_hr & 0x3f);
+                       tm->tm_hour = bcd2bin(_hr & 0x3f);
                else {
                        /* 12h format */
-                       tm->tm_hour = BCD2BIN(_hr & 0x1f);
+                       tm->tm_hour = bcd2bin(_hr & 0x1f);
                        if (_hr & ISL1208_REG_HR_PM)    /* PM flag set */
                                tm->tm_hour += 12;
                }
        }
 
-       tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DT]);
-       tm->tm_mon = BCD2BIN(regs[ISL1208_REG_MO]) - 1; /* rtc starts at 1 */
-       tm->tm_year = BCD2BIN(regs[ISL1208_REG_YR]) + 100;
-       tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DW]);
+       tm->tm_mday = bcd2bin(regs[ISL1208_REG_DT]);
+       tm->tm_mon = bcd2bin(regs[ISL1208_REG_MO]) - 1; /* rtc starts at 1 */
+       tm->tm_year = bcd2bin(regs[ISL1208_REG_YR]) + 100;
+       tm->tm_wday = bcd2bin(regs[ISL1208_REG_DW]);
 
        return 0;
 }
@@ -305,13 +305,13 @@ isl1208_i2c_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alarm)
        }
 
        /* MSB of each alarm register is an enable bit */
-       tm->tm_sec = BCD2BIN(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f);
-       tm->tm_min = BCD2BIN(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f);
-       tm->tm_hour = BCD2BIN(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f);
-       tm->tm_mday = BCD2BIN(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f);
+       tm->tm_sec = bcd2bin(regs[ISL1208_REG_SCA - ISL1208_REG_SCA] & 0x7f);
+       tm->tm_min = bcd2bin(regs[ISL1208_REG_MNA - ISL1208_REG_SCA] & 0x7f);
+       tm->tm_hour = bcd2bin(regs[ISL1208_REG_HRA - ISL1208_REG_SCA] & 0x3f);
+       tm->tm_mday = bcd2bin(regs[ISL1208_REG_DTA - ISL1208_REG_SCA] & 0x3f);
        tm->tm_mon =
-               BCD2BIN(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
-       tm->tm_wday = BCD2BIN(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
+               bcd2bin(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
+       tm->tm_wday = bcd2bin(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
 
        return 0;
 }
@@ -328,15 +328,15 @@ isl1208_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
        int sr;
        u8 regs[ISL1208_RTC_SECTION_LEN] = { 0, };
 
-       regs[ISL1208_REG_SC] = BIN2BCD(tm->tm_sec);
-       regs[ISL1208_REG_MN] = BIN2BCD(tm->tm_min);
-       regs[ISL1208_REG_HR] = BIN2BCD(tm->tm_hour) | ISL1208_REG_HR_MIL;
+       regs[ISL1208_REG_SC] = bin2bcd(tm->tm_sec);
+       regs[ISL1208_REG_MN] = bin2bcd(tm->tm_min);
+       regs[ISL1208_REG_HR] = bin2bcd(tm->tm_hour) | ISL1208_REG_HR_MIL;
 
-       regs[ISL1208_REG_DT] = BIN2BCD(tm->tm_mday);
-       regs[ISL1208_REG_MO] = BIN2BCD(tm->tm_mon + 1);
-       regs[ISL1208_REG_YR] = BIN2BCD(tm->tm_year - 100);
+       regs[ISL1208_REG_DT] = bin2bcd(tm->tm_mday);
+       regs[ISL1208_REG_MO] = bin2bcd(tm->tm_mon + 1);
+       regs[ISL1208_REG_YR] = bin2bcd(tm->tm_year - 100);
 
-       regs[ISL1208_REG_DW] = BIN2BCD(tm->tm_wday & 7);
+       regs[ISL1208_REG_DW] = bin2bcd(tm->tm_wday & 7);
 
        sr = isl1208_i2c_get_sr(client);
        if (sr < 0) {
index 470fb2d295452a7be54f92747076107c0c025436..893f7dece239522afcca4579d2524dba58f645a0 100644 (file)
@@ -110,15 +110,15 @@ static int m41t80_get_datetime(struct i2c_client *client,
                return -EIO;
        }
 
-       tm->tm_sec = BCD2BIN(buf[M41T80_REG_SEC] & 0x7f);
-       tm->tm_min = BCD2BIN(buf[M41T80_REG_MIN] & 0x7f);
-       tm->tm_hour = BCD2BIN(buf[M41T80_REG_HOUR] & 0x3f);
-       tm->tm_mday = BCD2BIN(buf[M41T80_REG_DAY] & 0x3f);
+       tm->tm_sec = bcd2bin(buf[M41T80_REG_SEC] & 0x7f);
+       tm->tm_min = bcd2bin(buf[M41T80_REG_MIN] & 0x7f);
+       tm->tm_hour = bcd2bin(buf[M41T80_REG_HOUR] & 0x3f);
+       tm->tm_mday = bcd2bin(buf[M41T80_REG_DAY] & 0x3f);
        tm->tm_wday = buf[M41T80_REG_WDAY] & 0x07;
-       tm->tm_mon = BCD2BIN(buf[M41T80_REG_MON] & 0x1f) - 1;
+       tm->tm_mon = bcd2bin(buf[M41T80_REG_MON] & 0x1f) - 1;
 
        /* assume 20YY not 19YY, and ignore the Century Bit */
-       tm->tm_year = BCD2BIN(buf[M41T80_REG_YEAR]) + 100;
+       tm->tm_year = bcd2bin(buf[M41T80_REG_YEAR]) + 100;
        return 0;
 }
 
@@ -161,19 +161,19 @@ static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
        /* Merge time-data and register flags into buf[0..7] */
        buf[M41T80_REG_SSEC] = 0;
        buf[M41T80_REG_SEC] =
-               BIN2BCD(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f);
+               bin2bcd(tm->tm_sec) | (buf[M41T80_REG_SEC] & ~0x7f);
        buf[M41T80_REG_MIN] =
-               BIN2BCD(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f);
+               bin2bcd(tm->tm_min) | (buf[M41T80_REG_MIN] & ~0x7f);
        buf[M41T80_REG_HOUR] =
-               BIN2BCD(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ;
+               bin2bcd(tm->tm_hour) | (buf[M41T80_REG_HOUR] & ~0x3f) ;
        buf[M41T80_REG_WDAY] =
                (tm->tm_wday & 0x07) | (buf[M41T80_REG_WDAY] & ~0x07);
        buf[M41T80_REG_DAY] =
-               BIN2BCD(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
+               bin2bcd(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
        buf[M41T80_REG_MON] =
-               BIN2BCD(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
+               bin2bcd(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
        /* assume 20YY not 19YY */
-       buf[M41T80_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+       buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 
        if (i2c_transfer(client->adapter, msgs, 1) != 1) {
                dev_err(&client->dev, "write error\n");
@@ -288,15 +288,15 @@ static int m41t80_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 
        wbuf[0] = M41T80_REG_ALARM_MON; /* offset into rtc's regs */
        reg[M41T80_REG_ALARM_SEC] |= t->time.tm_sec >= 0 ?
-               BIN2BCD(t->time.tm_sec) : 0x80;
+               bin2bcd(t->time.tm_sec) : 0x80;
        reg[M41T80_REG_ALARM_MIN] |= t->time.tm_min >= 0 ?
-               BIN2BCD(t->time.tm_min) : 0x80;
+               bin2bcd(t->time.tm_min) : 0x80;
        reg[M41T80_REG_ALARM_HOUR] |= t->time.tm_hour >= 0 ?
-               BIN2BCD(t->time.tm_hour) : 0x80;
+               bin2bcd(t->time.tm_hour) : 0x80;
        reg[M41T80_REG_ALARM_DAY] |= t->time.tm_mday >= 0 ?
-               BIN2BCD(t->time.tm_mday) : 0x80;
+               bin2bcd(t->time.tm_mday) : 0x80;
        if (t->time.tm_mon >= 0)
-               reg[M41T80_REG_ALARM_MON] |= BIN2BCD(t->time.tm_mon + 1);
+               reg[M41T80_REG_ALARM_MON] |= bin2bcd(t->time.tm_mon + 1);
        else
                reg[M41T80_REG_ALARM_DAY] |= 0x40;
 
@@ -347,15 +347,15 @@ static int m41t80_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *t)
        t->time.tm_mday = -1;
        t->time.tm_mon = -1;
        if (!(reg[M41T80_REG_ALARM_SEC] & 0x80))
-               t->time.tm_sec = BCD2BIN(reg[M41T80_REG_ALARM_SEC] & 0x7f);
+               t->time.tm_sec = bcd2bin(reg[M41T80_REG_ALARM_SEC] & 0x7f);
        if (!(reg[M41T80_REG_ALARM_MIN] & 0x80))
-               t->time.tm_min = BCD2BIN(reg[M41T80_REG_ALARM_MIN] & 0x7f);
+               t->time.tm_min = bcd2bin(reg[M41T80_REG_ALARM_MIN] & 0x7f);
        if (!(reg[M41T80_REG_ALARM_HOUR] & 0x80))
-               t->time.tm_hour = BCD2BIN(reg[M41T80_REG_ALARM_HOUR] & 0x3f);
+               t->time.tm_hour = bcd2bin(reg[M41T80_REG_ALARM_HOUR] & 0x3f);
        if (!(reg[M41T80_REG_ALARM_DAY] & 0x80))
-               t->time.tm_mday = BCD2BIN(reg[M41T80_REG_ALARM_DAY] & 0x3f);
+               t->time.tm_mday = bcd2bin(reg[M41T80_REG_ALARM_DAY] & 0x3f);
        if (!(reg[M41T80_REG_ALARM_DAY] & 0x40))
-               t->time.tm_mon = BCD2BIN(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1;
+               t->time.tm_mon = bcd2bin(reg[M41T80_REG_ALARM_MON] & 0x1f) - 1;
        t->time.tm_year = -1;
        t->time.tm_wday = -1;
        t->time.tm_yday = -1;
index 9b19499c829e5a7008a64a9d43de58e0429a6bd5..c3a18c58daf6ed80745f2c2c7fc7f8d5dcf2bde4 100644 (file)
@@ -41,17 +41,17 @@ static int m41t94_set_time(struct device *dev, struct rtc_time *tm)
                tm->tm_mon, tm->tm_year, tm->tm_wday);
 
        buf[0] = 0x80 | M41T94_REG_SECONDS; /* write time + date */
-       buf[M41T94_REG_SECONDS] = BIN2BCD(tm->tm_sec);
-       buf[M41T94_REG_MINUTES] = BIN2BCD(tm->tm_min);
-       buf[M41T94_REG_HOURS]   = BIN2BCD(tm->tm_hour);
-       buf[M41T94_REG_WDAY]    = BIN2BCD(tm->tm_wday + 1);
-       buf[M41T94_REG_DAY]     = BIN2BCD(tm->tm_mday);
-       buf[M41T94_REG_MONTH]   = BIN2BCD(tm->tm_mon + 1);
+       buf[M41T94_REG_SECONDS] = bin2bcd(tm->tm_sec);
+       buf[M41T94_REG_MINUTES] = bin2bcd(tm->tm_min);
+       buf[M41T94_REG_HOURS]   = bin2bcd(tm->tm_hour);
+       buf[M41T94_REG_WDAY]    = bin2bcd(tm->tm_wday + 1);
+       buf[M41T94_REG_DAY]     = bin2bcd(tm->tm_mday);
+       buf[M41T94_REG_MONTH]   = bin2bcd(tm->tm_mon + 1);
 
        buf[M41T94_REG_HOURS] |= M41T94_BIT_CEB;
        if (tm->tm_year >= 100)
                buf[M41T94_REG_HOURS] |= M41T94_BIT_CB;
-       buf[M41T94_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+       buf[M41T94_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 
        return spi_write(spi, buf, 8);
 }
@@ -82,14 +82,14 @@ static int m41t94_read_time(struct device *dev, struct rtc_time *tm)
                spi_write(spi, buf, 2);
        }
 
-       tm->tm_sec  = BCD2BIN(spi_w8r8(spi, M41T94_REG_SECONDS));
-       tm->tm_min  = BCD2BIN(spi_w8r8(spi, M41T94_REG_MINUTES));
+       tm->tm_sec  = bcd2bin(spi_w8r8(spi, M41T94_REG_SECONDS));
+       tm->tm_min  = bcd2bin(spi_w8r8(spi, M41T94_REG_MINUTES));
        hour = spi_w8r8(spi, M41T94_REG_HOURS);
-       tm->tm_hour = BCD2BIN(hour & 0x3f);
-       tm->tm_wday = BCD2BIN(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
-       tm->tm_mday = BCD2BIN(spi_w8r8(spi, M41T94_REG_DAY));
-       tm->tm_mon  = BCD2BIN(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
-       tm->tm_year = BCD2BIN(spi_w8r8(spi, M41T94_REG_YEAR));
+       tm->tm_hour = bcd2bin(hour & 0x3f);
+       tm->tm_wday = bcd2bin(spi_w8r8(spi, M41T94_REG_WDAY)) - 1;
+       tm->tm_mday = bcd2bin(spi_w8r8(spi, M41T94_REG_DAY));
+       tm->tm_mon  = bcd2bin(spi_w8r8(spi, M41T94_REG_MONTH)) - 1;
+       tm->tm_year = bcd2bin(spi_w8r8(spi, M41T94_REG_YEAR));
        if ((hour & M41T94_BIT_CB) || !(hour & M41T94_BIT_CEB))
                tm->tm_year += 100;
 
index ce4eff6a8d51bd57057c4d0e8f24f49304c4ab60..04b63dab693254885893ea622a7cf99cebc1b3cf 100644 (file)
@@ -76,10 +76,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
        /* Issue the READ command */
        M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
 
-       tm->tm_year     = BCD2BIN(M48T59_READ(M48T59_YEAR));
+       tm->tm_year     = bcd2bin(M48T59_READ(M48T59_YEAR));
        /* tm_mon is 0-11 */
-       tm->tm_mon      = BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1;
-       tm->tm_mday     = BCD2BIN(M48T59_READ(M48T59_MDAY));
+       tm->tm_mon      = bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
+       tm->tm_mday     = bcd2bin(M48T59_READ(M48T59_MDAY));
 
        val = M48T59_READ(M48T59_WDAY);
        if ((pdata->type == M48T59RTC_TYPE_M48T59) &&
@@ -88,10 +88,10 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
                tm->tm_year += 100;     /* one century */
        }
 
-       tm->tm_wday     = BCD2BIN(val & 0x07);
-       tm->tm_hour     = BCD2BIN(M48T59_READ(M48T59_HOUR) & 0x3F);
-       tm->tm_min      = BCD2BIN(M48T59_READ(M48T59_MIN) & 0x7F);
-       tm->tm_sec      = BCD2BIN(M48T59_READ(M48T59_SEC) & 0x7F);
+       tm->tm_wday     = bcd2bin(val & 0x07);
+       tm->tm_hour     = bcd2bin(M48T59_READ(M48T59_HOUR) & 0x3F);
+       tm->tm_min      = bcd2bin(M48T59_READ(M48T59_MIN) & 0x7F);
+       tm->tm_sec      = bcd2bin(M48T59_READ(M48T59_SEC) & 0x7F);
 
        /* Clear the READ bit */
        M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -119,17 +119,17 @@ static int m48t59_rtc_set_time(struct device *dev, struct rtc_time *tm)
        /* Issue the WRITE command */
        M48T59_SET_BITS(M48T59_CNTL_WRITE, M48T59_CNTL);
 
-       M48T59_WRITE((BIN2BCD(tm->tm_sec) & 0x7F), M48T59_SEC);
-       M48T59_WRITE((BIN2BCD(tm->tm_min) & 0x7F), M48T59_MIN);
-       M48T59_WRITE((BIN2BCD(tm->tm_hour) & 0x3F), M48T59_HOUR);
-       M48T59_WRITE((BIN2BCD(tm->tm_mday) & 0x3F), M48T59_MDAY);
+       M48T59_WRITE((bin2bcd(tm->tm_sec) & 0x7F), M48T59_SEC);
+       M48T59_WRITE((bin2bcd(tm->tm_min) & 0x7F), M48T59_MIN);
+       M48T59_WRITE((bin2bcd(tm->tm_hour) & 0x3F), M48T59_HOUR);
+       M48T59_WRITE((bin2bcd(tm->tm_mday) & 0x3F), M48T59_MDAY);
        /* tm_mon is 0-11 */
-       M48T59_WRITE((BIN2BCD(tm->tm_mon + 1) & 0x1F), M48T59_MONTH);
-       M48T59_WRITE(BIN2BCD(tm->tm_year % 100), M48T59_YEAR);
+       M48T59_WRITE((bin2bcd(tm->tm_mon + 1) & 0x1F), M48T59_MONTH);
+       M48T59_WRITE(bin2bcd(tm->tm_year % 100), M48T59_YEAR);
 
        if (pdata->type == M48T59RTC_TYPE_M48T59 && (tm->tm_year / 100))
                val = (M48T59_WDAY_CEB | M48T59_WDAY_CB);
-       val |= (BIN2BCD(tm->tm_wday) & 0x07);
+       val |= (bin2bcd(tm->tm_wday) & 0x07);
        M48T59_WRITE(val, M48T59_WDAY);
 
        /* Clear the WRITE bit */
@@ -158,18 +158,18 @@ static int m48t59_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
        /* Issue the READ command */
        M48T59_SET_BITS(M48T59_CNTL_READ, M48T59_CNTL);
 
-       tm->tm_year = BCD2BIN(M48T59_READ(M48T59_YEAR));
+       tm->tm_year = bcd2bin(M48T59_READ(M48T59_YEAR));
        /* tm_mon is 0-11 */
-       tm->tm_mon = BCD2BIN(M48T59_READ(M48T59_MONTH)) - 1;
+       tm->tm_mon = bcd2bin(M48T59_READ(M48T59_MONTH)) - 1;
 
        val = M48T59_READ(M48T59_WDAY);
        if ((val & M48T59_WDAY_CEB) && (val & M48T59_WDAY_CB))
                tm->tm_year += 100;     /* one century */
 
-       tm->tm_mday = BCD2BIN(M48T59_READ(M48T59_ALARM_DATE));
-       tm->tm_hour = BCD2BIN(M48T59_READ(M48T59_ALARM_HOUR));
-       tm->tm_min = BCD2BIN(M48T59_READ(M48T59_ALARM_MIN));
-       tm->tm_sec = BCD2BIN(M48T59_READ(M48T59_ALARM_SEC));
+       tm->tm_mday = bcd2bin(M48T59_READ(M48T59_ALARM_DATE));
+       tm->tm_hour = bcd2bin(M48T59_READ(M48T59_ALARM_HOUR));
+       tm->tm_min = bcd2bin(M48T59_READ(M48T59_ALARM_MIN));
+       tm->tm_sec = bcd2bin(M48T59_READ(M48T59_ALARM_SEC));
 
        /* Clear the READ bit */
        M48T59_CLEAR_BITS(M48T59_CNTL_READ, M48T59_CNTL);
@@ -201,18 +201,18 @@ static int m48t59_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
         * 0xff means "always match"
         */
        mday = tm->tm_mday;
-       mday = (mday >= 1 && mday <= 31) ? BIN2BCD(mday) : 0xff;
+       mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff;
        if (mday == 0xff)
                mday = M48T59_READ(M48T59_MDAY);
 
        hour = tm->tm_hour;
-       hour = (hour < 24) ? BIN2BCD(hour) : 0x00;
+       hour = (hour < 24) ? bin2bcd(hour) : 0x00;
 
        min = tm->tm_min;
-       min = (min < 60) ? BIN2BCD(min) : 0x00;
+       min = (min < 60) ? bin2bcd(min) : 0x00;
 
        sec = tm->tm_sec;
-       sec = (sec < 60) ? BIN2BCD(sec) : 0x00;
+       sec = (sec < 60) ? bin2bcd(sec) : 0x00;
 
        spin_lock_irqsave(&m48t59->lock, flags);
        /* Issue the WRITE command */
@@ -360,7 +360,6 @@ static struct bin_attribute m48t59_nvram_attr = {
        .attr = {
                .name = "nvram",
                .mode = S_IRUGO | S_IWUSR,
-               .owner = THIS_MODULE,
        },
        .read = m48t59_nvram_read,
        .write = m48t59_nvram_write,
index 3f7f99a5d96aa596d59258497f6bb1c6f01e1566..7c045cffa9ff64ae9e2e1e2a7c9f0ce75b7b9c18 100644 (file)
@@ -62,14 +62,14 @@ static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
                tm->tm_wday     = ops->readbyte(M48T86_REG_DOW);
        } else {
                /* bcd mode */
-               tm->tm_sec      = BCD2BIN(ops->readbyte(M48T86_REG_SEC));
-               tm->tm_min      = BCD2BIN(ops->readbyte(M48T86_REG_MIN));
-               tm->tm_hour     = BCD2BIN(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
-               tm->tm_mday     = BCD2BIN(ops->readbyte(M48T86_REG_DOM));
+               tm->tm_sec      = bcd2bin(ops->readbyte(M48T86_REG_SEC));
+               tm->tm_min      = bcd2bin(ops->readbyte(M48T86_REG_MIN));
+               tm->tm_hour     = bcd2bin(ops->readbyte(M48T86_REG_HOUR) & 0x3F);
+               tm->tm_mday     = bcd2bin(ops->readbyte(M48T86_REG_DOM));
                /* tm_mon is 0-11 */
-               tm->tm_mon      = BCD2BIN(ops->readbyte(M48T86_REG_MONTH)) - 1;
-               tm->tm_year     = BCD2BIN(ops->readbyte(M48T86_REG_YEAR)) + 100;
-               tm->tm_wday     = BCD2BIN(ops->readbyte(M48T86_REG_DOW));
+               tm->tm_mon      = bcd2bin(ops->readbyte(M48T86_REG_MONTH)) - 1;
+               tm->tm_year     = bcd2bin(ops->readbyte(M48T86_REG_YEAR)) + 100;
+               tm->tm_wday     = bcd2bin(ops->readbyte(M48T86_REG_DOW));
        }
 
        /* correct the hour if the clock is in 12h mode */
@@ -103,13 +103,13 @@ static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
                ops->writebyte(tm->tm_wday, M48T86_REG_DOW);
        } else {
                /* bcd mode */
-               ops->writebyte(BIN2BCD(tm->tm_sec), M48T86_REG_SEC);
-               ops->writebyte(BIN2BCD(tm->tm_min), M48T86_REG_MIN);
-               ops->writebyte(BIN2BCD(tm->tm_hour), M48T86_REG_HOUR);
-               ops->writebyte(BIN2BCD(tm->tm_mday), M48T86_REG_DOM);
-               ops->writebyte(BIN2BCD(tm->tm_mon + 1), M48T86_REG_MONTH);
-               ops->writebyte(BIN2BCD(tm->tm_year % 100), M48T86_REG_YEAR);
-               ops->writebyte(BIN2BCD(tm->tm_wday), M48T86_REG_DOW);
+               ops->writebyte(bin2bcd(tm->tm_sec), M48T86_REG_SEC);
+               ops->writebyte(bin2bcd(tm->tm_min), M48T86_REG_MIN);
+               ops->writebyte(bin2bcd(tm->tm_hour), M48T86_REG_HOUR);
+               ops->writebyte(bin2bcd(tm->tm_mday), M48T86_REG_DOM);
+               ops->writebyte(bin2bcd(tm->tm_mon + 1), M48T86_REG_MONTH);
+               ops->writebyte(bin2bcd(tm->tm_year % 100), M48T86_REG_YEAR);
+               ops->writebyte(bin2bcd(tm->tm_wday), M48T86_REG_DOW);
        }
 
        /* update ended */
index 12c9cd25cad8b2b3df1843ba74d247f592334e41..80782798763fa2528993ed652b0cf337e9d79008 100644 (file)
@@ -150,14 +150,14 @@ static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
        if (rc < 0)
                return rc;
 
-       tm->tm_sec = BCD2BIN(regs[MAX6900_REG_SC]);
-       tm->tm_min = BCD2BIN(regs[MAX6900_REG_MN]);
-       tm->tm_hour = BCD2BIN(regs[MAX6900_REG_HR] & 0x3f);
-       tm->tm_mday = BCD2BIN(regs[MAX6900_REG_DT]);
-       tm->tm_mon = BCD2BIN(regs[MAX6900_REG_MO]) - 1;
-       tm->tm_year = BCD2BIN(regs[MAX6900_REG_YR]) +
-           BCD2BIN(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
-       tm->tm_wday = BCD2BIN(regs[MAX6900_REG_DW]);
+       tm->tm_sec = bcd2bin(regs[MAX6900_REG_SC]);
+       tm->tm_min = bcd2bin(regs[MAX6900_REG_MN]);
+       tm->tm_hour = bcd2bin(regs[MAX6900_REG_HR] & 0x3f);
+       tm->tm_mday = bcd2bin(regs[MAX6900_REG_DT]);
+       tm->tm_mon = bcd2bin(regs[MAX6900_REG_MO]) - 1;
+       tm->tm_year = bcd2bin(regs[MAX6900_REG_YR]) +
+                     bcd2bin(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
+       tm->tm_wday = bcd2bin(regs[MAX6900_REG_DW]);
 
        return 0;
 }
@@ -184,14 +184,14 @@ max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
        if (rc < 0)
                return rc;
 
-       regs[MAX6900_REG_SC] = BIN2BCD(tm->tm_sec);
-       regs[MAX6900_REG_MN] = BIN2BCD(tm->tm_min);
-       regs[MAX6900_REG_HR] = BIN2BCD(tm->tm_hour);
-       regs[MAX6900_REG_DT] = BIN2BCD(tm->tm_mday);
-       regs[MAX6900_REG_MO] = BIN2BCD(tm->tm_mon + 1);
-       regs[MAX6900_REG_DW] = BIN2BCD(tm->tm_wday);
-       regs[MAX6900_REG_YR] = BIN2BCD(tm->tm_year % 100);
-       regs[MAX6900_REG_CENTURY] = BIN2BCD((tm->tm_year + 1900) / 100);
+       regs[MAX6900_REG_SC] = bin2bcd(tm->tm_sec);
+       regs[MAX6900_REG_MN] = bin2bcd(tm->tm_min);
+       regs[MAX6900_REG_HR] = bin2bcd(tm->tm_hour);
+       regs[MAX6900_REG_DT] = bin2bcd(tm->tm_mday);
+       regs[MAX6900_REG_MO] = bin2bcd(tm->tm_mon + 1);
+       regs[MAX6900_REG_DW] = bin2bcd(tm->tm_wday);
+       regs[MAX6900_REG_YR] = bin2bcd(tm->tm_year % 100);
+       regs[MAX6900_REG_CENTURY] = bin2bcd((tm->tm_year + 1900) / 100);
        /* set write protect */
        regs[MAX6900_REG_CT] = MAX6900_REG_CT_WP;
 
index 78b2551fb19def5e380aa73f70f424c6e500e91e..2f6507df7b49bddbc517130d645183948c7ceec8 100644 (file)
@@ -124,15 +124,15 @@ static int max6902_get_datetime(struct device *dev, struct rtc_time *dt)
 
        /* The chip sends data in this order:
         * Seconds, Minutes, Hours, Date, Month, Day, Year */
-       dt->tm_sec      = BCD2BIN(chip->buf[1]);
-       dt->tm_min      = BCD2BIN(chip->buf[2]);
-       dt->tm_hour     = BCD2BIN(chip->buf[3]);
-       dt->tm_mday     = BCD2BIN(chip->buf[4]);
-       dt->tm_mon      = BCD2BIN(chip->buf[5]) - 1;
-       dt->tm_wday     = BCD2BIN(chip->buf[6]);
-       dt->tm_year = BCD2BIN(chip->buf[7]);
+       dt->tm_sec      = bcd2bin(chip->buf[1]);
+       dt->tm_min      = bcd2bin(chip->buf[2]);
+       dt->tm_hour     = bcd2bin(chip->buf[3]);
+       dt->tm_mday     = bcd2bin(chip->buf[4]);
+       dt->tm_mon      = bcd2bin(chip->buf[5]) - 1;
+       dt->tm_wday     = bcd2bin(chip->buf[6]);
+       dt->tm_year = bcd2bin(chip->buf[7]);
 
-       century = BCD2BIN(tmp) * 100;
+       century = bcd2bin(tmp) * 100;
 
        dt->tm_year += century;
        dt->tm_year -= 1900;
@@ -168,15 +168,15 @@ static int max6902_set_datetime(struct device *dev, struct rtc_time *dt)
        /* Remove write protection */
        max6902_set_reg(dev, 0xF, 0);
 
-       max6902_set_reg(dev, 0x01, BIN2BCD(dt->tm_sec));
-       max6902_set_reg(dev, 0x03, BIN2BCD(dt->tm_min));
-       max6902_set_reg(dev, 0x05, BIN2BCD(dt->tm_hour));
+       max6902_set_reg(dev, 0x01, bin2bcd(dt->tm_sec));
+       max6902_set_reg(dev, 0x03, bin2bcd(dt->tm_min));
+       max6902_set_reg(dev, 0x05, bin2bcd(dt->tm_hour));
 
-       max6902_set_reg(dev, 0x07, BIN2BCD(dt->tm_mday));
-       max6902_set_reg(dev, 0x09, BIN2BCD(dt->tm_mon+1));
-       max6902_set_reg(dev, 0x0B, BIN2BCD(dt->tm_wday));
-       max6902_set_reg(dev, 0x0D, BIN2BCD(dt->tm_year%100));
-       max6902_set_reg(dev, 0x13, BIN2BCD(dt->tm_year/100));
+       max6902_set_reg(dev, 0x07, bin2bcd(dt->tm_mday));
+       max6902_set_reg(dev, 0x09, bin2bcd(dt->tm_mon+1));
+       max6902_set_reg(dev, 0x0B, bin2bcd(dt->tm_wday));
+       max6902_set_reg(dev, 0x0D, bin2bcd(dt->tm_year%100));
+       max6902_set_reg(dev, 0x13, bin2bcd(dt->tm_year/100));
 
        /* Compulab used a delay here. However, the datasheet
         * does not mention a delay being required anywhere... */
index 8876605d4d4bbc746a56da23169e4c190cbb4e6c..2cbeb0794f142f2172b75566ed6e4929315f99f1 100644 (file)
@@ -186,30 +186,30 @@ static int tm2bcd(struct rtc_time *tm)
        if (rtc_valid_tm(tm) != 0)
                return -EINVAL;
 
-       tm->tm_sec = BIN2BCD(tm->tm_sec);
-       tm->tm_min = BIN2BCD(tm->tm_min);
-       tm->tm_hour = BIN2BCD(tm->tm_hour);
-       tm->tm_mday = BIN2BCD(tm->tm_mday);
+       tm->tm_sec = bin2bcd(tm->tm_sec);
+       tm->tm_min = bin2bcd(tm->tm_min);
+       tm->tm_hour = bin2bcd(tm->tm_hour);
+       tm->tm_mday = bin2bcd(tm->tm_mday);
 
-       tm->tm_mon = BIN2BCD(tm->tm_mon + 1);
+       tm->tm_mon = bin2bcd(tm->tm_mon + 1);
 
        /* epoch == 1900 */
        if (tm->tm_year < 100 || tm->tm_year > 199)
                return -EINVAL;
-       tm->tm_year = BIN2BCD(tm->tm_year - 100);
+       tm->tm_year = bin2bcd(tm->tm_year - 100);
 
        return 0;
 }
 
 static void bcd2tm(struct rtc_time *tm)
 {
-       tm->tm_sec = BCD2BIN(tm->tm_sec);
-       tm->tm_min = BCD2BIN(tm->tm_min);
-       tm->tm_hour = BCD2BIN(tm->tm_hour);
-       tm->tm_mday = BCD2BIN(tm->tm_mday);
-       tm->tm_mon = BCD2BIN(tm->tm_mon) - 1;
+       tm->tm_sec = bcd2bin(tm->tm_sec);
+       tm->tm_min = bcd2bin(tm->tm_min);
+       tm->tm_hour = bcd2bin(tm->tm_hour);
+       tm->tm_mday = bcd2bin(tm->tm_mday);
+       tm->tm_mon = bcd2bin(tm->tm_mon) - 1;
        /* epoch == 1900 */
-       tm->tm_year = BCD2BIN(tm->tm_year) + 100;
+       tm->tm_year = bcd2bin(tm->tm_year) + 100;
 }
 
 
index a829f20ad6d6eb140d56d246ab620af0640507d7..b725913ccbe805ecb843252277c02ae8fc4169b0 100644 (file)
@@ -97,13 +97,13 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
                buf[8]);
 
 
-       tm->tm_sec = BCD2BIN(buf[PCF8563_REG_SC] & 0x7F);
-       tm->tm_min = BCD2BIN(buf[PCF8563_REG_MN] & 0x7F);
-       tm->tm_hour = BCD2BIN(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
-       tm->tm_mday = BCD2BIN(buf[PCF8563_REG_DM] & 0x3F);
+       tm->tm_sec = bcd2bin(buf[PCF8563_REG_SC] & 0x7F);
+       tm->tm_min = bcd2bin(buf[PCF8563_REG_MN] & 0x7F);
+       tm->tm_hour = bcd2bin(buf[PCF8563_REG_HR] & 0x3F); /* rtc hr 0-23 */
+       tm->tm_mday = bcd2bin(buf[PCF8563_REG_DM] & 0x3F);
        tm->tm_wday = buf[PCF8563_REG_DW] & 0x07;
-       tm->tm_mon = BCD2BIN(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
-       tm->tm_year = BCD2BIN(buf[PCF8563_REG_YR]);
+       tm->tm_mon = bcd2bin(buf[PCF8563_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
+       tm->tm_year = bcd2bin(buf[PCF8563_REG_YR]);
        if (tm->tm_year < 70)
                tm->tm_year += 100;     /* assume we are in 1970...2069 */
        /* detect the polarity heuristically. see note above. */
@@ -138,17 +138,17 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
                tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
        /* hours, minutes and seconds */
-       buf[PCF8563_REG_SC] = BIN2BCD(tm->tm_sec);
-       buf[PCF8563_REG_MN] = BIN2BCD(tm->tm_min);
-       buf[PCF8563_REG_HR] = BIN2BCD(tm->tm_hour);
+       buf[PCF8563_REG_SC] = bin2bcd(tm->tm_sec);
+       buf[PCF8563_REG_MN] = bin2bcd(tm->tm_min);
+       buf[PCF8563_REG_HR] = bin2bcd(tm->tm_hour);
 
-       buf[PCF8563_REG_DM] = BIN2BCD(tm->tm_mday);
+       buf[PCF8563_REG_DM] = bin2bcd(tm->tm_mday);
 
        /* month, 1 - 12 */
-       buf[PCF8563_REG_MO] = BIN2BCD(tm->tm_mon + 1);
+       buf[PCF8563_REG_MO] = bin2bcd(tm->tm_mon + 1);
 
        /* year and century */
-       buf[PCF8563_REG_YR] = BIN2BCD(tm->tm_year % 100);
+       buf[PCF8563_REG_YR] = bin2bcd(tm->tm_year % 100);
        if (pcf8563->c_polarity ? (tm->tm_year >= 100) : (tm->tm_year < 100))
                buf[PCF8563_REG_MO] |= PCF8563_MO_C;
 
index d388c662bf4b6b630909187a9fbc5370e3c97c7f..7d33cda3f8f6cc5f00331635cc86ca32ddd62283 100644 (file)
@@ -76,11 +76,11 @@ static int pcf8583_get_datetime(struct i2c_client *client, struct rtc_time *dt)
                buf[4] &= 0x3f;
                buf[5] &= 0x1f;
 
-               dt->tm_sec = BCD2BIN(buf[1]);
-               dt->tm_min = BCD2BIN(buf[2]);
-               dt->tm_hour = BCD2BIN(buf[3]);
-               dt->tm_mday = BCD2BIN(buf[4]);
-               dt->tm_mon = BCD2BIN(buf[5]) - 1;
+               dt->tm_sec = bcd2bin(buf[1]);
+               dt->tm_min = bcd2bin(buf[2]);
+               dt->tm_hour = bcd2bin(buf[3]);
+               dt->tm_mday = bcd2bin(buf[4]);
+               dt->tm_mon = bcd2bin(buf[5]) - 1;
        }
 
        return ret == 2 ? 0 : -EIO;
@@ -94,14 +94,14 @@ static int pcf8583_set_datetime(struct i2c_client *client, struct rtc_time *dt,
        buf[0] = 0;
        buf[1] = get_ctrl(client) | 0x80;
        buf[2] = 0;
-       buf[3] = BIN2BCD(dt->tm_sec);
-       buf[4] = BIN2BCD(dt->tm_min);
-       buf[5] = BIN2BCD(dt->tm_hour);
+       buf[3] = bin2bcd(dt->tm_sec);
+       buf[4] = bin2bcd(dt->tm_min);
+       buf[5] = bin2bcd(dt->tm_hour);
 
        if (datetoo) {
                len = 8;
-               buf[6] = BIN2BCD(dt->tm_mday) | (dt->tm_year << 6);
-               buf[7] = BIN2BCD(dt->tm_mon + 1)  | (dt->tm_wday << 5);
+               buf[6] = bin2bcd(dt->tm_mday) | (dt->tm_year << 6);
+               buf[7] = bin2bcd(dt->tm_mon + 1)  | (dt->tm_wday << 5);
        }
 
        ret = i2c_master_send(client, (char *)buf, len);
index 395985b339c933a2c0afa17bff1f38fc78e6bf17..42028f233bef6d596ae05c632cb50cbbd1845892 100644 (file)
@@ -80,13 +80,13 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt)
 
        memset(dt, 0, sizeof(*dt));
 
-       dt->tm_sec = BCD2BIN(buf[0]); /* RSECCNT */
-       dt->tm_min = BCD2BIN(buf[1]); /* RMINCNT */
-       dt->tm_hour = BCD2BIN(buf[2]); /* RHRCNT */
+       dt->tm_sec = bcd2bin(buf[0]); /* RSECCNT */
+       dt->tm_min = bcd2bin(buf[1]); /* RMINCNT */
+       dt->tm_hour = bcd2bin(buf[2]); /* RHRCNT */
 
-       dt->tm_mday = BCD2BIN(buf[3]); /* RDAYCNT */
-       dt->tm_mon = BCD2BIN(buf[4]) - 1; /* RMONCNT */
-       dt->tm_year = BCD2BIN(buf[5]) + 100; /* RYRCNT */
+       dt->tm_mday = bcd2bin(buf[3]); /* RDAYCNT */
+       dt->tm_mon = bcd2bin(buf[4]) - 1; /* RMONCNT */
+       dt->tm_year = bcd2bin(buf[5]) + 100; /* RYRCNT */
 
        /* the rtc device may contain illegal values on power up
         * according to the data sheet. make sure they are valid.
@@ -103,12 +103,12 @@ static int r9701_set_datetime(struct device *dev, struct rtc_time *dt)
        if (year >= 2100 || year < 2000)
                return -EINVAL;
 
-       ret = write_reg(dev, RHRCNT, BIN2BCD(dt->tm_hour));
-       ret = ret ? ret : write_reg(dev, RMINCNT, BIN2BCD(dt->tm_min));
-       ret = ret ? ret : write_reg(dev, RSECCNT, BIN2BCD(dt->tm_sec));
-       ret = ret ? ret : write_reg(dev, RDAYCNT, BIN2BCD(dt->tm_mday));
-       ret = ret ? ret : write_reg(dev, RMONCNT, BIN2BCD(dt->tm_mon + 1));
-       ret = ret ? ret : write_reg(dev, RYRCNT, BIN2BCD(dt->tm_year - 100));
+       ret = write_reg(dev, RHRCNT, bin2bcd(dt->tm_hour));
+       ret = ret ? ret : write_reg(dev, RMINCNT, bin2bcd(dt->tm_min));
+       ret = ret ? ret : write_reg(dev, RSECCNT, bin2bcd(dt->tm_sec));
+       ret = ret ? ret : write_reg(dev, RDAYCNT, bin2bcd(dt->tm_mday));
+       ret = ret ? ret : write_reg(dev, RMONCNT, bin2bcd(dt->tm_mon + 1));
+       ret = ret ? ret : write_reg(dev, RYRCNT, bin2bcd(dt->tm_year - 100));
        ret = ret ? ret : write_reg(dev, RWKCNT, 1 << dt->tm_wday);
 
        return ret;
index 1c14d4497c4db8bffc37148b138e8b17070eaf28..e6ea3f5ee1eb84836999111aff709888e9012c55 100644 (file)
@@ -235,33 +235,33 @@ static int rs5c313_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        data = rs5c313_read_reg(RS5C313_ADDR_SEC);
        data |= (rs5c313_read_reg(RS5C313_ADDR_SEC10) << 4);
-       tm->tm_sec = BCD2BIN(data);
+       tm->tm_sec = bcd2bin(data);
 
        data = rs5c313_read_reg(RS5C313_ADDR_MIN);
        data |= (rs5c313_read_reg(RS5C313_ADDR_MIN10) << 4);
-       tm->tm_min = BCD2BIN(data);
+       tm->tm_min = bcd2bin(data);
 
        data = rs5c313_read_reg(RS5C313_ADDR_HOUR);
        data |= (rs5c313_read_reg(RS5C313_ADDR_HOUR10) << 4);
-       tm->tm_hour = BCD2BIN(data);
+       tm->tm_hour = bcd2bin(data);
 
        data = rs5c313_read_reg(RS5C313_ADDR_DAY);
        data |= (rs5c313_read_reg(RS5C313_ADDR_DAY10) << 4);
-       tm->tm_mday = BCD2BIN(data);
+       tm->tm_mday = bcd2bin(data);
 
        data = rs5c313_read_reg(RS5C313_ADDR_MON);
        data |= (rs5c313_read_reg(RS5C313_ADDR_MON10) << 4);
-       tm->tm_mon = BCD2BIN(data) - 1;
+       tm->tm_mon = bcd2bin(data) - 1;
 
        data = rs5c313_read_reg(RS5C313_ADDR_YEAR);
        data |= (rs5c313_read_reg(RS5C313_ADDR_YEAR10) << 4);
-       tm->tm_year = BCD2BIN(data);
+       tm->tm_year = bcd2bin(data);
 
        if (tm->tm_year < 70)
                tm->tm_year += 100;
 
        data = rs5c313_read_reg(RS5C313_ADDR_WEEK);
-       tm->tm_wday = BCD2BIN(data);
+       tm->tm_wday = bcd2bin(data);
 
        RS5C313_CEDISABLE;
        ndelay(700);            /* CE:L */
@@ -294,31 +294,31 @@ static int rs5c313_rtc_set_time(struct device *dev, struct rtc_time *tm)
                }
        }
 
-       data = BIN2BCD(tm->tm_sec);
+       data = bin2bcd(tm->tm_sec);
        rs5c313_write_reg(RS5C313_ADDR_SEC, data);
        rs5c313_write_reg(RS5C313_ADDR_SEC10, (data >> 4));
 
-       data = BIN2BCD(tm->tm_min);
+       data = bin2bcd(tm->tm_min);
        rs5c313_write_reg(RS5C313_ADDR_MIN, data );
        rs5c313_write_reg(RS5C313_ADDR_MIN10, (data >> 4));
 
-       data = BIN2BCD(tm->tm_hour);
+       data = bin2bcd(tm->tm_hour);
        rs5c313_write_reg(RS5C313_ADDR_HOUR, data);
        rs5c313_write_reg(RS5C313_ADDR_HOUR10, (data >> 4));
 
-       data = BIN2BCD(tm->tm_mday);
+       data = bin2bcd(tm->tm_mday);
        rs5c313_write_reg(RS5C313_ADDR_DAY, data);
        rs5c313_write_reg(RS5C313_ADDR_DAY10, (data>> 4));
 
-       data = BIN2BCD(tm->tm_mon + 1);
+       data = bin2bcd(tm->tm_mon + 1);
        rs5c313_write_reg(RS5C313_ADDR_MON, data);
        rs5c313_write_reg(RS5C313_ADDR_MON10, (data >> 4));
 
-       data = BIN2BCD(tm->tm_year % 100);
+       data = bin2bcd(tm->tm_year % 100);
        rs5c313_write_reg(RS5C313_ADDR_YEAR, data);
        rs5c313_write_reg(RS5C313_ADDR_YEAR10, (data >> 4));
 
-       data = BIN2BCD(tm->tm_wday);
+       data = bin2bcd(tm->tm_wday);
        rs5c313_write_reg(RS5C313_ADDR_WEEK, data);
 
        RS5C313_CEDISABLE;      /* CE:H */
index 839462659afa4c386c46758c5eed872a61df27d1..dd1e2bc7a472093a77e76719478e6ea1c6f02ae7 100644 (file)
@@ -74,20 +74,20 @@ rs5c348_rtc_set_time(struct device *dev, struct rtc_time *tm)
        txbuf[3] = 0;   /* dummy */
        txbuf[4] = RS5C348_CMD_MW(RS5C348_REG_SECS); /* cmd, sec, ... */
        txp = &txbuf[5];
-       txp[RS5C348_REG_SECS] = BIN2BCD(tm->tm_sec);
-       txp[RS5C348_REG_MINS] = BIN2BCD(tm->tm_min);
+       txp[RS5C348_REG_SECS] = bin2bcd(tm->tm_sec);
+       txp[RS5C348_REG_MINS] = bin2bcd(tm->tm_min);
        if (pdata->rtc_24h) {
-               txp[RS5C348_REG_HOURS] = BIN2BCD(tm->tm_hour);
+               txp[RS5C348_REG_HOURS] = bin2bcd(tm->tm_hour);
        } else {
                /* hour 0 is AM12, noon is PM12 */
-               txp[RS5C348_REG_HOURS] = BIN2BCD((tm->tm_hour + 11) % 12 + 1) |
+               txp[RS5C348_REG_HOURS] = bin2bcd((tm->tm_hour + 11) % 12 + 1) |
                        (tm->tm_hour >= 12 ? RS5C348_BIT_PM : 0);
        }
-       txp[RS5C348_REG_WDAY] = BIN2BCD(tm->tm_wday);
-       txp[RS5C348_REG_DAY] = BIN2BCD(tm->tm_mday);
-       txp[RS5C348_REG_MONTH] = BIN2BCD(tm->tm_mon + 1) |
+       txp[RS5C348_REG_WDAY] = bin2bcd(tm->tm_wday);
+       txp[RS5C348_REG_DAY] = bin2bcd(tm->tm_mday);
+       txp[RS5C348_REG_MONTH] = bin2bcd(tm->tm_mon + 1) |
                (tm->tm_year >= 100 ? RS5C348_BIT_Y2K : 0);
-       txp[RS5C348_REG_YEAR] = BIN2BCD(tm->tm_year % 100);
+       txp[RS5C348_REG_YEAR] = bin2bcd(tm->tm_year % 100);
        /* write in one transfer to avoid data inconsistency */
        ret = spi_write_then_read(spi, txbuf, sizeof(txbuf), NULL, 0);
        udelay(62);     /* Tcsr 62us */
@@ -116,20 +116,20 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
        if (ret < 0)
                return ret;
 
-       tm->tm_sec = BCD2BIN(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK);
-       tm->tm_min = BCD2BIN(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
-       tm->tm_hour = BCD2BIN(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
+       tm->tm_sec = bcd2bin(rxbuf[RS5C348_REG_SECS] & RS5C348_SECS_MASK);
+       tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);
+       tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);
        if (!pdata->rtc_24h) {
                tm->tm_hour %= 12;
                if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM)
                        tm->tm_hour += 12;
        }
-       tm->tm_wday = BCD2BIN(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
-       tm->tm_mday = BCD2BIN(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
+       tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);
+       tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);
        tm->tm_mon =
-               BCD2BIN(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1;
+               bcd2bin(rxbuf[RS5C348_REG_MONTH] & RS5C348_MONTH_MASK) - 1;
        /* year is 1900 + tm->tm_year */
-       tm->tm_year = BCD2BIN(rxbuf[RS5C348_REG_YEAR]) +
+       tm->tm_year = bcd2bin(rxbuf[RS5C348_REG_YEAR]) +
                ((rxbuf[RS5C348_REG_MONTH] & RS5C348_BIT_Y2K) ? 100 : 0);
 
        if (rtc_valid_tm(tm) < 0) {
index 8b561958fb1e62056c62ef2aabe37742ab2cd395..2f2c68d476daa924028e167be775859d35ce21b4 100644 (file)
@@ -148,9 +148,9 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
        unsigned        hour;
 
        if (rs5c->time24)
-               return BCD2BIN(reg & 0x3f);
+               return bcd2bin(reg & 0x3f);
 
-       hour = BCD2BIN(reg & 0x1f);
+       hour = bcd2bin(reg & 0x1f);
        if (hour == 12)
                hour = 0;
        if (reg & 0x20)
@@ -161,15 +161,15 @@ static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
 static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
 {
        if (rs5c->time24)
-               return BIN2BCD(hour);
+               return bin2bcd(hour);
 
        if (hour > 12)
-               return 0x20 | BIN2BCD(hour - 12);
+               return 0x20 | bin2bcd(hour - 12);
        if (hour == 12)
-               return 0x20 | BIN2BCD(12);
+               return 0x20 | bin2bcd(12);
        if (hour == 0)
-               return BIN2BCD(12);
-       return BIN2BCD(hour);
+               return bin2bcd(12);
+       return bin2bcd(hour);
 }
 
 static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
@@ -180,18 +180,18 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
        if (status < 0)
                return status;
 
-       tm->tm_sec = BCD2BIN(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
-       tm->tm_min = BCD2BIN(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
+       tm->tm_sec = bcd2bin(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
+       tm->tm_min = bcd2bin(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
        tm->tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C372_REG_HOURS]);
 
-       tm->tm_wday = BCD2BIN(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
-       tm->tm_mday = BCD2BIN(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
+       tm->tm_wday = bcd2bin(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
+       tm->tm_mday = bcd2bin(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
 
        /* tm->tm_mon is zero-based */
-       tm->tm_mon = BCD2BIN(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
+       tm->tm_mon = bcd2bin(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
 
        /* year is 1900 + tm->tm_year */
-       tm->tm_year = BCD2BIN(rs5c->regs[RS5C372_REG_YEAR]) + 100;
+       tm->tm_year = bcd2bin(rs5c->regs[RS5C372_REG_YEAR]) + 100;
 
        dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
                "mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -216,13 +216,13 @@ static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
                tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
        addr   = RS5C_ADDR(RS5C372_REG_SECS);
-       buf[0] = BIN2BCD(tm->tm_sec);
-       buf[1] = BIN2BCD(tm->tm_min);
+       buf[0] = bin2bcd(tm->tm_sec);
+       buf[1] = bin2bcd(tm->tm_min);
        buf[2] = rs5c_hr2reg(rs5c, tm->tm_hour);
-       buf[3] = BIN2BCD(tm->tm_wday);
-       buf[4] = BIN2BCD(tm->tm_mday);
-       buf[5] = BIN2BCD(tm->tm_mon + 1);
-       buf[6] = BIN2BCD(tm->tm_year - 100);
+       buf[3] = bin2bcd(tm->tm_wday);
+       buf[4] = bin2bcd(tm->tm_mday);
+       buf[5] = bin2bcd(tm->tm_mon + 1);
+       buf[6] = bin2bcd(tm->tm_year - 100);
 
        if (i2c_smbus_write_i2c_block_data(client, addr, sizeof(buf), buf) < 0) {
                dev_err(&client->dev, "%s: write error\n", __func__);
@@ -367,7 +367,7 @@ static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 
        /* report alarm time */
        t->time.tm_sec = 0;
-       t->time.tm_min = BCD2BIN(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
+       t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
        t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
        t->time.tm_mday = -1;
        t->time.tm_mon = -1;
@@ -413,7 +413,7 @@ static int rs5c_set_alarm(struct device *dev, struct rtc_wkalrm *t)
        }
 
        /* set alarm */
-       buf[0] = BIN2BCD(t->time.tm_min);
+       buf[0] = bin2bcd(t->time.tm_min);
        buf[1] = rs5c_hr2reg(rs5c, t->time.tm_hour);
        buf[2] = 0x7f;  /* any/all days */
 
index a6fa1f2f2ca6b59726ab099e46a698b4e26cae2d..def4d396d0b096cef968a5572f2b5122d92eeb33 100644 (file)
@@ -104,12 +104,12 @@ static int s35390a_disable_test_mode(struct s35390a *s35390a)
 static char s35390a_hr2reg(struct s35390a *s35390a, int hour)
 {
        if (s35390a->twentyfourhour)
-               return BIN2BCD(hour);
+               return bin2bcd(hour);
 
        if (hour < 12)
-               return BIN2BCD(hour);
+               return bin2bcd(hour);
 
-       return 0x40 | BIN2BCD(hour - 12);
+       return 0x40 | bin2bcd(hour - 12);
 }
 
 static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
@@ -117,9 +117,9 @@ static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
        unsigned hour;
 
        if (s35390a->twentyfourhour)
-               return BCD2BIN(reg & 0x3f);
+               return bcd2bin(reg & 0x3f);
 
-       hour = BCD2BIN(reg & 0x3f);
+       hour = bcd2bin(reg & 0x3f);
        if (reg & 0x40)
                hour += 12;
 
@@ -137,13 +137,13 @@ static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
                tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year,
                tm->tm_wday);
 
-       buf[S35390A_BYTE_YEAR] = BIN2BCD(tm->tm_year - 100);
-       buf[S35390A_BYTE_MONTH] = BIN2BCD(tm->tm_mon + 1);
-       buf[S35390A_BYTE_DAY] = BIN2BCD(tm->tm_mday);
-       buf[S35390A_BYTE_WDAY] = BIN2BCD(tm->tm_wday);
+       buf[S35390A_BYTE_YEAR] = bin2bcd(tm->tm_year - 100);
+       buf[S35390A_BYTE_MONTH] = bin2bcd(tm->tm_mon + 1);
+       buf[S35390A_BYTE_DAY] = bin2bcd(tm->tm_mday);
+       buf[S35390A_BYTE_WDAY] = bin2bcd(tm->tm_wday);
        buf[S35390A_BYTE_HOURS] = s35390a_hr2reg(s35390a, tm->tm_hour);
-       buf[S35390A_BYTE_MINS] = BIN2BCD(tm->tm_min);
-       buf[S35390A_BYTE_SECS] = BIN2BCD(tm->tm_sec);
+       buf[S35390A_BYTE_MINS] = bin2bcd(tm->tm_min);
+       buf[S35390A_BYTE_SECS] = bin2bcd(tm->tm_sec);
 
        /* This chip expects the bits of each byte to be in reverse order */
        for (i = 0; i < 7; ++i)
@@ -168,13 +168,13 @@ static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
        for (i = 0; i < 7; ++i)
                buf[i] = bitrev8(buf[i]);
 
-       tm->tm_sec = BCD2BIN(buf[S35390A_BYTE_SECS]);
-       tm->tm_min = BCD2BIN(buf[S35390A_BYTE_MINS]);
+       tm->tm_sec = bcd2bin(buf[S35390A_BYTE_SECS]);
+       tm->tm_min = bcd2bin(buf[S35390A_BYTE_MINS]);
        tm->tm_hour = s35390a_reg2hr(s35390a, buf[S35390A_BYTE_HOURS]);
-       tm->tm_wday = BCD2BIN(buf[S35390A_BYTE_WDAY]);
-       tm->tm_mday = BCD2BIN(buf[S35390A_BYTE_DAY]);
-       tm->tm_mon = BCD2BIN(buf[S35390A_BYTE_MONTH]) - 1;
-       tm->tm_year = BCD2BIN(buf[S35390A_BYTE_YEAR]) + 100;
+       tm->tm_wday = bcd2bin(buf[S35390A_BYTE_WDAY]);
+       tm->tm_mday = bcd2bin(buf[S35390A_BYTE_DAY]);
+       tm->tm_mon = bcd2bin(buf[S35390A_BYTE_MONTH]) - 1;
+       tm->tm_year = bcd2bin(buf[S35390A_BYTE_YEAR]) + 100;
 
        dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, mday=%d, "
                "mon=%d, year=%d, wday=%d\n", __func__, tm->tm_sec,
index e7d19b6c265af6e5a285778309d2047f46c82585..910bc704939c2c77c7448149819d5284dfe64a67 100644 (file)
@@ -134,12 +134,12 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
                 rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
                 rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
 
-       BCD_TO_BIN(rtc_tm->tm_sec);
-       BCD_TO_BIN(rtc_tm->tm_min);
-       BCD_TO_BIN(rtc_tm->tm_hour);
-       BCD_TO_BIN(rtc_tm->tm_mday);
-       BCD_TO_BIN(rtc_tm->tm_mon);
-       BCD_TO_BIN(rtc_tm->tm_year);
+       rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
+       rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
+       rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
+       rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
+       rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
+       rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
        rtc_tm->tm_year += 100;
        rtc_tm->tm_mon -= 1;
@@ -163,12 +163,12 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
                return -EINVAL;
        }
 
-       writeb(BIN2BCD(tm->tm_sec),  base + S3C2410_RTCSEC);
-       writeb(BIN2BCD(tm->tm_min),  base + S3C2410_RTCMIN);
-       writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR);
-       writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE);
-       writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON);
-       writeb(BIN2BCD(year), base + S3C2410_RTCYEAR);
+       writeb(bin2bcd(tm->tm_sec),  base + S3C2410_RTCSEC);
+       writeb(bin2bcd(tm->tm_min),  base + S3C2410_RTCMIN);
+       writeb(bin2bcd(tm->tm_hour), base + S3C2410_RTCHOUR);
+       writeb(bin2bcd(tm->tm_mday), base + S3C2410_RTCDATE);
+       writeb(bin2bcd(tm->tm_mon + 1), base + S3C2410_RTCMON);
+       writeb(bin2bcd(year), base + S3C2410_RTCYEAR);
 
        return 0;
 }
@@ -199,34 +199,34 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
        /* decode the alarm enable field */
 
        if (alm_en & S3C2410_RTCALM_SECEN)
-               BCD_TO_BIN(alm_tm->tm_sec);
+               alm_tm->tm_sec = bcd2bin(alm_tm->tm_sec);
        else
                alm_tm->tm_sec = 0xff;
 
        if (alm_en & S3C2410_RTCALM_MINEN)
-               BCD_TO_BIN(alm_tm->tm_min);
+               alm_tm->tm_min = bcd2bin(alm_tm->tm_min);
        else
                alm_tm->tm_min = 0xff;
 
        if (alm_en & S3C2410_RTCALM_HOUREN)
-               BCD_TO_BIN(alm_tm->tm_hour);
+               alm_tm->tm_hour = bcd2bin(alm_tm->tm_hour);
        else
                alm_tm->tm_hour = 0xff;
 
        if (alm_en & S3C2410_RTCALM_DAYEN)
-               BCD_TO_BIN(alm_tm->tm_mday);
+               alm_tm->tm_mday = bcd2bin(alm_tm->tm_mday);
        else
                alm_tm->tm_mday = 0xff;
 
        if (alm_en & S3C2410_RTCALM_MONEN) {
-               BCD_TO_BIN(alm_tm->tm_mon);
+               alm_tm->tm_mon = bcd2bin(alm_tm->tm_mon);
                alm_tm->tm_mon -= 1;
        } else {
                alm_tm->tm_mon = 0xff;
        }
 
        if (alm_en & S3C2410_RTCALM_YEAREN)
-               BCD_TO_BIN(alm_tm->tm_year);
+               alm_tm->tm_year = bcd2bin(alm_tm->tm_year);
        else
                alm_tm->tm_year = 0xffff;
 
@@ -250,17 +250,17 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 
        if (tm->tm_sec < 60 && tm->tm_sec >= 0) {
                alrm_en |= S3C2410_RTCALM_SECEN;
-               writeb(BIN2BCD(tm->tm_sec), base + S3C2410_ALMSEC);
+               writeb(bin2bcd(tm->tm_sec), base + S3C2410_ALMSEC);
        }
 
        if (tm->tm_min < 60 && tm->tm_min >= 0) {
                alrm_en |= S3C2410_RTCALM_MINEN;
-               writeb(BIN2BCD(tm->tm_min), base + S3C2410_ALMMIN);
+               writeb(bin2bcd(tm->tm_min), base + S3C2410_ALMMIN);
        }
 
        if (tm->tm_hour < 24 && tm->tm_hour >= 0) {
                alrm_en |= S3C2410_RTCALM_HOUREN;
-               writeb(BIN2BCD(tm->tm_hour), base + S3C2410_ALMHOUR);
+               writeb(bin2bcd(tm->tm_hour), base + S3C2410_ALMHOUR);
        }
 
        pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en);
index 3f393c82e32cba741c10f50cea4783fe536cceec..aaf9d6a337cc78181e89413353026e56b9bde272 100644 (file)
@@ -324,23 +324,23 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
                sec128 = readb(rtc->regbase + R64CNT);
 
-               tm->tm_sec      = BCD2BIN(readb(rtc->regbase + RSECCNT));
-               tm->tm_min      = BCD2BIN(readb(rtc->regbase + RMINCNT));
-               tm->tm_hour     = BCD2BIN(readb(rtc->regbase + RHRCNT));
-               tm->tm_wday     = BCD2BIN(readb(rtc->regbase + RWKCNT));
-               tm->tm_mday     = BCD2BIN(readb(rtc->regbase + RDAYCNT));
-               tm->tm_mon      = BCD2BIN(readb(rtc->regbase + RMONCNT)) - 1;
+               tm->tm_sec      = bcd2bin(readb(rtc->regbase + RSECCNT));
+               tm->tm_min      = bcd2bin(readb(rtc->regbase + RMINCNT));
+               tm->tm_hour     = bcd2bin(readb(rtc->regbase + RHRCNT));
+               tm->tm_wday     = bcd2bin(readb(rtc->regbase + RWKCNT));
+               tm->tm_mday     = bcd2bin(readb(rtc->regbase + RDAYCNT));
+               tm->tm_mon      = bcd2bin(readb(rtc->regbase + RMONCNT)) - 1;
 
                if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
                        yr  = readw(rtc->regbase + RYRCNT);
-                       yr100 = BCD2BIN(yr >> 8);
+                       yr100 = bcd2bin(yr >> 8);
                        yr &= 0xff;
                } else {
                        yr  = readb(rtc->regbase + RYRCNT);
-                       yr100 = BCD2BIN((yr == 0x99) ? 0x19 : 0x20);
+                       yr100 = bcd2bin((yr == 0x99) ? 0x19 : 0x20);
                }
 
-               tm->tm_year = (yr100 * 100 + BCD2BIN(yr)) - 1900;
+               tm->tm_year = (yr100 * 100 + bcd2bin(yr)) - 1900;
 
                sec2 = readb(rtc->regbase + R64CNT);
                cf_bit = readb(rtc->regbase + RCR1) & RCR1_CF;
@@ -382,20 +382,20 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
        tmp &= ~RCR2_START;
        writeb(tmp, rtc->regbase + RCR2);
 
-       writeb(BIN2BCD(tm->tm_sec),  rtc->regbase + RSECCNT);
-       writeb(BIN2BCD(tm->tm_min),  rtc->regbase + RMINCNT);
-       writeb(BIN2BCD(tm->tm_hour), rtc->regbase + RHRCNT);
-       writeb(BIN2BCD(tm->tm_wday), rtc->regbase + RWKCNT);
-       writeb(BIN2BCD(tm->tm_mday), rtc->regbase + RDAYCNT);
-       writeb(BIN2BCD(tm->tm_mon + 1), rtc->regbase + RMONCNT);
+       writeb(bin2bcd(tm->tm_sec),  rtc->regbase + RSECCNT);
+       writeb(bin2bcd(tm->tm_min),  rtc->regbase + RMINCNT);
+       writeb(bin2bcd(tm->tm_hour), rtc->regbase + RHRCNT);
+       writeb(bin2bcd(tm->tm_wday), rtc->regbase + RWKCNT);
+       writeb(bin2bcd(tm->tm_mday), rtc->regbase + RDAYCNT);
+       writeb(bin2bcd(tm->tm_mon + 1), rtc->regbase + RMONCNT);
 
        if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
-               year = (BIN2BCD((tm->tm_year + 1900) / 100) << 8) |
-                       BIN2BCD(tm->tm_year % 100);
+               year = (bin2bcd((tm->tm_year + 1900) / 100) << 8) |
+                       bin2bcd(tm->tm_year % 100);
                writew(year, rtc->regbase + RYRCNT);
        } else {
                year = tm->tm_year % 100;
-               writeb(BIN2BCD(year), rtc->regbase + RYRCNT);
+               writeb(bin2bcd(year), rtc->regbase + RYRCNT);
        }
 
        /* Start RTC */
@@ -417,7 +417,7 @@ static inline int sh_rtc_read_alarm_value(struct sh_rtc *rtc, int reg_off)
        byte = readb(rtc->regbase + reg_off);
        if (byte & AR_ENB) {
                byte &= ~AR_ENB;        /* strip the enable bit */
-               value = BCD2BIN(byte);
+               value = bcd2bin(byte);
        }
 
        return value;
@@ -455,7 +455,7 @@ static inline void sh_rtc_write_alarm_value(struct sh_rtc *rtc,
        if (value < 0)
                writeb(0, rtc->regbase + reg_off);
        else
-               writeb(BIN2BCD(value) | AR_ENB,  rtc->regbase + reg_off);
+               writeb(bin2bcd(value) | AR_ENB,  rtc->regbase + reg_off);
 }
 
 static int sh_rtc_check_alarm(struct rtc_time *tm)
index 9a7e920315fa76c6af27d307c913df0b184dadad..f4cd46e15af98830ac3c580c52563119e7bc103e 100644 (file)
@@ -82,14 +82,14 @@ static int stk17ta8_rtc_set_time(struct device *dev, struct rtc_time *tm)
        flags = readb(pdata->ioaddr + RTC_FLAGS);
        writeb(flags | RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
 
-       writeb(BIN2BCD(tm->tm_year % 100), ioaddr + RTC_YEAR);
-       writeb(BIN2BCD(tm->tm_mon + 1), ioaddr + RTC_MONTH);
-       writeb(BIN2BCD(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
-       writeb(BIN2BCD(tm->tm_mday), ioaddr + RTC_DATE);
-       writeb(BIN2BCD(tm->tm_hour), ioaddr + RTC_HOURS);
-       writeb(BIN2BCD(tm->tm_min), ioaddr + RTC_MINUTES);
-       writeb(BIN2BCD(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
-       writeb(BIN2BCD((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY);
+       writeb(bin2bcd(tm->tm_year % 100), ioaddr + RTC_YEAR);
+       writeb(bin2bcd(tm->tm_mon + 1), ioaddr + RTC_MONTH);
+       writeb(bin2bcd(tm->tm_wday) & RTC_DAY_MASK, ioaddr + RTC_DAY);
+       writeb(bin2bcd(tm->tm_mday), ioaddr + RTC_DATE);
+       writeb(bin2bcd(tm->tm_hour), ioaddr + RTC_HOURS);
+       writeb(bin2bcd(tm->tm_min), ioaddr + RTC_MINUTES);
+       writeb(bin2bcd(tm->tm_sec) & RTC_SECONDS_MASK, ioaddr + RTC_SECONDS);
+       writeb(bin2bcd((tm->tm_year + 1900) / 100), ioaddr + RTC_CENTURY);
 
        writeb(flags & ~RTC_WRITE, pdata->ioaddr + RTC_FLAGS);
        return 0;
@@ -120,14 +120,14 @@ static int stk17ta8_rtc_read_time(struct device *dev, struct rtc_time *tm)
        year = readb(ioaddr + RTC_YEAR);
        century = readb(ioaddr + RTC_CENTURY);
        writeb(flags & ~RTC_READ, ioaddr + RTC_FLAGS);
-       tm->tm_sec = BCD2BIN(second);
-       tm->tm_min = BCD2BIN(minute);
-       tm->tm_hour = BCD2BIN(hour);
-       tm->tm_mday = BCD2BIN(day);
-       tm->tm_wday = BCD2BIN(week);
-       tm->tm_mon = BCD2BIN(month) - 1;
+       tm->tm_sec = bcd2bin(second);
+       tm->tm_min = bcd2bin(minute);
+       tm->tm_hour = bcd2bin(hour);
+       tm->tm_mday = bcd2bin(day);
+       tm->tm_wday = bcd2bin(week);
+       tm->tm_mon = bcd2bin(month) - 1;
        /* year is 1900 + tm->tm_year */
-       tm->tm_year = BCD2BIN(year) + BCD2BIN(century) * 100 - 1900;
+       tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
        if (rtc_valid_tm(tm) < 0) {
                dev_err(dev, "retrieved date/time is not valid.\n");
@@ -148,16 +148,16 @@ static void stk17ta8_rtc_update_alarm(struct rtc_plat_data *pdata)
        writeb(flags | RTC_WRITE, ioaddr + RTC_FLAGS);
 
        writeb(pdata->alrm_mday < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_mday),
+              0x80 : bin2bcd(pdata->alrm_mday),
               ioaddr + RTC_DATE_ALARM);
        writeb(pdata->alrm_hour < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_hour),
+              0x80 : bin2bcd(pdata->alrm_hour),
               ioaddr + RTC_HOURS_ALARM);
        writeb(pdata->alrm_min < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_min),
+              0x80 : bin2bcd(pdata->alrm_min),
               ioaddr + RTC_MINUTES_ALARM);
        writeb(pdata->alrm_sec < 0 || (pdata->irqen & RTC_UF) ?
-              0x80 : BIN2BCD(pdata->alrm_sec),
+              0x80 : bin2bcd(pdata->alrm_sec),
               ioaddr + RTC_SECONDS_ALARM);
        writeb(pdata->irqen ? RTC_INTS_AIE : 0, ioaddr + RTC_INTERRUPTS);
        readb(ioaddr + RTC_FLAGS);      /* clear interrupts */
@@ -280,7 +280,6 @@ static struct bin_attribute stk17ta8_nvram_attr = {
        .attr = {
                .name = "nvram",
                .mode = S_IRUGO | S_IWUSR,
-               .owner = THIS_MODULE,
        },
        .size = RTC_OFFSET,
        .read = stk17ta8_nvram_read,
index 10025d84026812e0eab2fe1f3fa5179e411b2aca..14d4f036a7686e7c637752c95ae620df99458f77 100644 (file)
@@ -92,19 +92,19 @@ static int v3020_read_time(struct device *dev, struct rtc_time *dt)
 
        /* ...and then read constant values. */
        tmp = v3020_get_reg(chip, V3020_SECONDS);
-       dt->tm_sec      = BCD2BIN(tmp);
+       dt->tm_sec      = bcd2bin(tmp);
        tmp = v3020_get_reg(chip, V3020_MINUTES);
-       dt->tm_min      = BCD2BIN(tmp);
+       dt->tm_min      = bcd2bin(tmp);
        tmp = v3020_get_reg(chip, V3020_HOURS);
-       dt->tm_hour     = BCD2BIN(tmp);
+       dt->tm_hour     = bcd2bin(tmp);
        tmp = v3020_get_reg(chip, V3020_MONTH_DAY);
-       dt->tm_mday     = BCD2BIN(tmp);
+       dt->tm_mday     = bcd2bin(tmp);
        tmp = v3020_get_reg(chip, V3020_MONTH);
-       dt->tm_mon    = BCD2BIN(tmp) - 1;
+       dt->tm_mon    = bcd2bin(tmp) - 1;
        tmp = v3020_get_reg(chip, V3020_WEEK_DAY);
-       dt->tm_wday     = BCD2BIN(tmp);
+       dt->tm_wday     = bcd2bin(tmp);
        tmp = v3020_get_reg(chip, V3020_YEAR);
-       dt->tm_year = BCD2BIN(tmp)+100;
+       dt->tm_year = bcd2bin(tmp)+100;
 
 #ifdef DEBUG
        printk("\n%s : Read RTC values\n",__func__);
@@ -136,13 +136,13 @@ static int v3020_set_time(struct device *dev, struct rtc_time *dt)
 #endif
 
        /* Write all the values to ram... */
-       v3020_set_reg(chip, V3020_SECONDS,      BIN2BCD(dt->tm_sec));
-       v3020_set_reg(chip, V3020_MINUTES,      BIN2BCD(dt->tm_min));
-       v3020_set_reg(chip, V3020_HOURS,        BIN2BCD(dt->tm_hour));
-       v3020_set_reg(chip, V3020_MONTH_DAY,    BIN2BCD(dt->tm_mday));
-       v3020_set_reg(chip, V3020_MONTH,     BIN2BCD(dt->tm_mon + 1));
-       v3020_set_reg(chip, V3020_WEEK_DAY,     BIN2BCD(dt->tm_wday));
-       v3020_set_reg(chip, V3020_YEAR,         BIN2BCD(dt->tm_year % 100));
+       v3020_set_reg(chip, V3020_SECONDS,      bin2bcd(dt->tm_sec));
+       v3020_set_reg(chip, V3020_MINUTES,      bin2bcd(dt->tm_min));
+       v3020_set_reg(chip, V3020_HOURS,        bin2bcd(dt->tm_hour));
+       v3020_set_reg(chip, V3020_MONTH_DAY,    bin2bcd(dt->tm_mday));
+       v3020_set_reg(chip, V3020_MONTH,     bin2bcd(dt->tm_mon + 1));
+       v3020_set_reg(chip, V3020_WEEK_DAY,     bin2bcd(dt->tm_wday));
+       v3020_set_reg(chip, V3020_YEAR,         bin2bcd(dt->tm_year % 100));
 
        /* ...and set the clock. */
        v3020_set_reg(chip, V3020_CMD_RAM2CLOCK, 0);
index 7dcfba1bbfe131539af2aba38d361e53d036280e..310c10795e9a81cf0c37960a1d40fb46a743369a 100644 (file)
@@ -118,13 +118,13 @@ static int x1205_get_datetime(struct i2c_client *client, struct rtc_time *tm,
                for (i = 0; i <= 4; i++)
                        buf[i] &= 0x7F;
 
-       tm->tm_sec = BCD2BIN(buf[CCR_SEC]);
-       tm->tm_min = BCD2BIN(buf[CCR_MIN]);
-       tm->tm_hour = BCD2BIN(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
-       tm->tm_mday = BCD2BIN(buf[CCR_MDAY]);
-       tm->tm_mon = BCD2BIN(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
-       tm->tm_year = BCD2BIN(buf[CCR_YEAR])
-                       + (BCD2BIN(buf[CCR_Y2K]) * 100) - 1900;
+       tm->tm_sec = bcd2bin(buf[CCR_SEC]);
+       tm->tm_min = bcd2bin(buf[CCR_MIN]);
+       tm->tm_hour = bcd2bin(buf[CCR_HOUR] & 0x3F); /* hr is 0-23 */
+       tm->tm_mday = bcd2bin(buf[CCR_MDAY]);
+       tm->tm_mon = bcd2bin(buf[CCR_MONTH]) - 1; /* mon is 0-11 */
+       tm->tm_year = bcd2bin(buf[CCR_YEAR])
+                       + (bcd2bin(buf[CCR_Y2K]) * 100) - 1900;
        tm->tm_wday = buf[CCR_WDAY];
 
        dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
@@ -174,11 +174,11 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
                __func__,
                tm->tm_sec, tm->tm_min, tm->tm_hour);
 
-       buf[CCR_SEC] = BIN2BCD(tm->tm_sec);
-       buf[CCR_MIN] = BIN2BCD(tm->tm_min);
+       buf[CCR_SEC] = bin2bcd(tm->tm_sec);
+       buf[CCR_MIN] = bin2bcd(tm->tm_min);
 
        /* set hour and 24hr bit */
-       buf[CCR_HOUR] = BIN2BCD(tm->tm_hour) | X1205_HR_MIL;
+       buf[CCR_HOUR] = bin2bcd(tm->tm_hour) | X1205_HR_MIL;
 
        /* should we also set the date? */
        if (datetoo) {
@@ -187,15 +187,15 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm,
                        __func__,
                        tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-               buf[CCR_MDAY] = BIN2BCD(tm->tm_mday);
+               buf[CCR_MDAY] = bin2bcd(tm->tm_mday);
 
                /* month, 1 - 12 */
-               buf[CCR_MONTH] = BIN2BCD(tm->tm_mon + 1);
+               buf[CCR_MONTH] = bin2bcd(tm->tm_mon + 1);
 
                /* year, since the rtc epoch*/
-               buf[CCR_YEAR] = BIN2BCD(tm->tm_year % 100);
+               buf[CCR_YEAR] = bin2bcd(tm->tm_year % 100);
                buf[CCR_WDAY] = tm->tm_wday & 0x07;
-               buf[CCR_Y2K] = BIN2BCD(tm->tm_year / 100);
+               buf[CCR_Y2K] = bin2bcd(tm->tm_year / 100);
        }
 
        /* If writing alarm registers, set compare bits on registers 0-4 */
@@ -437,7 +437,7 @@ static int x1205_validate_client(struct i2c_client *client)
                        return -EIO;
                }
 
-               value = BCD2BIN(reg & probe_limits_pattern[i].mask);
+               value = bcd2bin(reg & probe_limits_pattern[i].mask);
 
                if (value > probe_limits_pattern[i].max ||
                        value < probe_limits_pattern[i].min) {
index 69f8346aa288348a86a8d9885dbfd1759867c5ad..5877f29a600541d7acfaf44b8ea91fae8d74243c 100644 (file)
@@ -189,7 +189,6 @@ static struct bin_attribute arcmsr_sysfs_message_read_attr = {
        .attr = {
                .name = "mu_read",
                .mode = S_IRUSR ,
-               .owner = THIS_MODULE,
        },
        .size = 1032,
        .read = arcmsr_sysfs_iop_message_read,
@@ -199,7 +198,6 @@ static struct bin_attribute arcmsr_sysfs_message_write_attr = {
        .attr = {
                .name = "mu_write",
                .mode = S_IWUSR,
-               .owner = THIS_MODULE,
        },
        .size = 1032,
        .write = arcmsr_sysfs_iop_message_write,
@@ -209,7 +207,6 @@ static struct bin_attribute arcmsr_sysfs_message_clear_attr = {
        .attr = {
                .name = "mu_clear",
                .mode = S_IWUSR,
-               .owner = THIS_MODULE,
        },
        .size = 1,
        .write = arcmsr_sysfs_iop_message_clear,
index 4eb3da996b369518edb0a1efa924f5c97657a614..4ad3e017213f868ed85af9a7032912194836abea 100644 (file)
@@ -223,9 +223,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
                                no_multi = 1;
                                break;
                        }
-                       min = BCD2BIN(buffer[15]);
-                       sec = BCD2BIN(buffer[16]);
-                       frame = BCD2BIN(buffer[17]);
+                       min = bcd2bin(buffer[15]);
+                       sec = bcd2bin(buffer[16]);
+                       frame = bcd2bin(buffer[17]);
                        sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
                        break;
                }
@@ -252,9 +252,9 @@ int sr_cd_check(struct cdrom_device_info *cdi)
                        }
                        if (rc != 0)
                                break;
-                       min = BCD2BIN(buffer[1]);
-                       sec = BCD2BIN(buffer[2]);
-                       frame = BCD2BIN(buffer[3]);
+                       min = bcd2bin(buffer[1]);
+                       sec = bcd2bin(buffer[2]);
+                       frame = bcd2bin(buffer[3]);
                        sector = min * CD_SECS * CD_FRAMES + sec * CD_FRAMES + frame;
                        if (sector)
                                sector -= CD_MSF_OFFSET;
index 0416ad3bc127bd6f45b3302319dd7f15aff7fafd..418b4fe9a0a1f40cbf3403974ebe2fab935802cf 100644 (file)
@@ -111,7 +111,7 @@ static struct parisc_driver serial_driver = {
        .probe          = serial_init_chip,
 };
 
-int __init probe_serial_gsc(void)
+static int __init probe_serial_gsc(void)
 {
        register_parisc_driver(&lasi_driver);
        register_parisc_driver(&serial_driver);
index 8fcb4c5b9a263b2db8897fe9360dec3943552569..7313c2edcb831b873d89156bb6200179dd642fb8 100644 (file)
@@ -1039,7 +1039,7 @@ static int __devinit serial_txx9_probe(struct platform_device *dev)
                ret = serial_txx9_register_port(&port);
                if (ret < 0) {
                        dev_err(&dev->dev, "unable to register port at index %d "
-                               "(IO%x MEM%llx IRQ%d): %d\n", i,
+                               "(IO%lx MEM%llx IRQ%d): %d\n", i,
                                p->iobase, (unsigned long long)p->mapbase,
                                p->irq, ret);
                }
index b73e3c0056cd77531248a89dc2925809e61faae6..d5276c012f78f529348af32a4d227c2d969ff879 100644 (file)
@@ -61,7 +61,7 @@
 #define SN_SAL_BUFFER_SIZE (64 * (1 << 10))
 
 #define SN_SAL_UART_FIFO_DEPTH 16
-#define SN_SAL_UART_FIFO_SPEED_CPS 9600/10
+#define SN_SAL_UART_FIFO_SPEED_CPS (9600/10)
 
 /* sn_transmit_chars() calling args */
 #define TRANSMIT_BUFFERED      0
index 57a121c338c44c61d0874a689802398afbedf65f..593fdb767aad2a6f87ddd3092d9170a8c9414c68 100644 (file)
@@ -1,10 +1,12 @@
 config VIDEO_GO7007
        tristate "Go 7007 support"
        depends on VIDEO_DEV && PCI && I2C && INPUT
+       depends on SND
        select VIDEOBUF_DMA_SG
        select VIDEO_IR
        select VIDEO_TUNER
        select VIDEO_TVEEPROM
+       select SND_PCM
        select CRC32
        default N
        ---help---
index 1ae3508066003f99fc94abb8d14423acf00ebc09..6e6cf0b9ef99100824cd802c1135188fb738dd50 100644 (file)
@@ -1,6 +1,7 @@
 config SXG
        tristate "Alacritech SLIC Technology Non-Accelerated 10Gbe support"
        depends on PCI && NETDEV_10000
+       depends on X86
        default n
        help
          This driver supports the Alacritech SLIC Technology Non-Accelerated
index 4d74ba36c3a12b0d8acd19cd7115b154522f862b..37caf4d690370489cd7dcbd880f035c67c0c4bd2 100644 (file)
@@ -54,7 +54,6 @@ static int phone_open(struct inode *inode, struct file *file)
        if (minor >= PHONE_NUM_DEVICES)
                return -ENODEV;
 
-       lock_kernel();
        mutex_lock(&phone_lock);
        p = phone_device[minor];
        if (p)
@@ -81,7 +80,6 @@ static int phone_open(struct inode *inode, struct file *file)
        fops_put(old_fops);
 end:
        mutex_unlock(&phone_lock);
-       unlock_kernel();
        return err;
 }
 
index 217c5118ae9e71818ea80014cb2b54cb8c63c707..cd5f20da738a0cb4e579a046c2a352a58791ea86 100644 (file)
@@ -1002,101 +1002,132 @@ fb_blank(struct fb_info *info, int blank)
        return ret;
 }
 
-static int 
-fb_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+static long
+fb_ioctl(struct file *file, unsigned int cmd,
         unsigned long arg)
 {
+       struct inode *inode = file->f_path.dentry->d_inode;
        int fbidx = iminor(inode);
-       struct fb_info *info = registered_fb[fbidx];
-       struct fb_ops *fb = info->fbops;
+       struct fb_info *info;
+       struct fb_ops *fb;
        struct fb_var_screeninfo var;
        struct fb_fix_screeninfo fix;
        struct fb_con2fbmap con2fb;
        struct fb_cmap_user cmap;
        struct fb_event event;
        void __user *argp = (void __user *)arg;
-       int i;
-       
-       if (!fb)
+       long ret = 0;
+
+       info = registered_fb[fbidx];
+       mutex_lock(&info->lock);
+       fb = info->fbops;
+
+       if (!fb) {
+               mutex_unlock(&info->lock);
                return -ENODEV;
+       }
        switch (cmd) {
        case FBIOGET_VSCREENINFO:
-               return copy_to_user(argp, &info->var,
+               ret = copy_to_user(argp, &info->var,
                                    sizeof(var)) ? -EFAULT : 0;
+               break;
        case FBIOPUT_VSCREENINFO:
-               if (copy_from_user(&var, argp, sizeof(var)))
-                       return -EFAULT;
+               if (copy_from_user(&var, argp, sizeof(var))) {
+                       ret =  -EFAULT;
+                       break;
+               }
                acquire_console_sem();
                info->flags |= FBINFO_MISC_USEREVENT;
-               i = fb_set_var(info, &var);
+               ret = fb_set_var(info, &var);
                info->flags &= ~FBINFO_MISC_USEREVENT;
                release_console_sem();
-               if (i) return i;
-               if (copy_to_user(argp, &var, sizeof(var)))
-                       return -EFAULT;
-               return 0;
+               if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
+                       ret = -EFAULT;
+               break;
        case FBIOGET_FSCREENINFO:
-               return copy_to_user(argp, &info->fix,
+               ret = copy_to_user(argp, &info->fix,
                                    sizeof(fix)) ? -EFAULT : 0;
+               break;
        case FBIOPUTCMAP:
                if (copy_from_user(&cmap, argp, sizeof(cmap)))
-                       return -EFAULT;
-               return (fb_set_user_cmap(&cmap, info));
+                       ret = -EFAULT;
+               else
+                       ret = fb_set_user_cmap(&cmap, info);
+               break;
        case FBIOGETCMAP:
                if (copy_from_user(&cmap, argp, sizeof(cmap)))
-                       return -EFAULT;
-               return fb_cmap_to_user(&info->cmap, &cmap);
+                       ret = -EFAULT;
+               else
+                       ret = fb_cmap_to_user(&info->cmap, &cmap);
+               break;
        case FBIOPAN_DISPLAY:
-               if (copy_from_user(&var, argp, sizeof(var)))
-                       return -EFAULT;
+               if (copy_from_user(&var, argp, sizeof(var))) {
+                       ret = -EFAULT;
+                       break;
+               }
                acquire_console_sem();
-               i = fb_pan_display(info, &var);
+               ret = fb_pan_display(info, &var);
                release_console_sem();
-               if (i)
-                       return i;
-               if (copy_to_user(argp, &var, sizeof(var)))
-                       return -EFAULT;
-               return 0;
+               if (ret == 0 && copy_to_user(argp, &var, sizeof(var)))
+                       ret = -EFAULT;
+               break;
        case FBIO_CURSOR:
-               return -EINVAL;
+               ret = -EINVAL;
+               break;
        case FBIOGET_CON2FBMAP:
                if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
-                       return -EFAULT;
-               if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
-                   return -EINVAL;
-               con2fb.framebuffer = -1;
-               event.info = info;
-               event.data = &con2fb;
-               fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event);
-               return copy_to_user(argp, &con2fb,
+                       ret = -EFAULT;
+               else if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
+                       ret = -EINVAL;
+               else {
+                       con2fb.framebuffer = -1;
+                       event.info = info;
+                       event.data = &con2fb;
+                       fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP,
+                                                               &event);
+                       ret = copy_to_user(argp, &con2fb,
                                    sizeof(con2fb)) ? -EFAULT : 0;
+               }
+               break;
        case FBIOPUT_CON2FBMAP:
-               if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
-                       return - EFAULT;
-               if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
-                   return -EINVAL;
-               if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX)
-                   return -EINVAL;
-               if (!registered_fb[con2fb.framebuffer])
-                   request_module("fb%d", con2fb.framebuffer);
+               if (copy_from_user(&con2fb, argp, sizeof(con2fb))) {
+                       ret = -EFAULT;
+                       break;
+               }
+               if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) {
+                       ret = -EINVAL;
+                       break;
+               }
+               if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) {
+                       ret = -EINVAL;
+                       break;
+               }
                if (!registered_fb[con2fb.framebuffer])
-                   return -EINVAL;
+                       request_module("fb%d", con2fb.framebuffer);
+               if (!registered_fb[con2fb.framebuffer]) {
+                       ret = -EINVAL;
+                       break;
+               }
                event.info = info;
                event.data = &con2fb;
-               return fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP,
+               ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP,
                                              &event);
+               break;
        case FBIOBLANK:
                acquire_console_sem();
                info->flags |= FBINFO_MISC_USEREVENT;
-               i = fb_blank(info, arg);
+               ret = fb_blank(info, arg);
                info->flags &= ~FBINFO_MISC_USEREVENT;
                release_console_sem();
-               return i;
+               break;;
        default:
                if (fb->fb_ioctl == NULL)
-                       return -EINVAL;
-               return fb->fb_ioctl(info, cmd, arg);
+                       ret = -ENOTTY;
+               else
+                       ret = fb->fb_ioctl(info, cmd, arg);
        }
+       mutex_unlock(&info->lock);
+       return ret;
 }
 
 #ifdef CONFIG_COMPAT
@@ -1150,7 +1181,7 @@ static int fb_getput_cmap(struct inode *inode, struct file *file,
            put_user(compat_ptr(data), &cmap->transp))
                return -EFAULT;
 
-       err = fb_ioctl(inode, file, cmd, (unsigned long) cmap);
+       err = fb_ioctl(file, cmd, (unsigned long) cmap);
 
        if (!err) {
                if (copy_in_user(&cmap32->start,
@@ -1204,7 +1235,7 @@ static int fb_get_fscreeninfo(struct inode *inode, struct file *file,
 
        old_fs = get_fs();
        set_fs(KERNEL_DS);
-       err = fb_ioctl(inode, file, cmd, (unsigned long) &fix);
+       err = fb_ioctl(file, cmd, (unsigned long) &fix);
        set_fs(old_fs);
 
        if (!err)
@@ -1222,7 +1253,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        struct fb_ops *fb = info->fbops;
        long ret = -ENOIOCTLCMD;
 
-       lock_kernel();
+       mutex_lock(&info->lock);
        switch(cmd) {
        case FBIOGET_VSCREENINFO:
        case FBIOPUT_VSCREENINFO:
@@ -1231,7 +1262,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case FBIOPUT_CON2FBMAP:
                arg = (unsigned long) compat_ptr(arg);
        case FBIOBLANK:
-               ret = fb_ioctl(inode, file, cmd, arg);
+               ret = fb_ioctl(file, cmd, arg);
                break;
 
        case FBIOGET_FSCREENINFO:
@@ -1248,7 +1279,7 @@ fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                        ret = fb->fb_compat_ioctl(info, cmd, arg);
                break;
        }
-       unlock_kernel();
+       mutex_unlock(&info->lock);
        return ret;
 }
 #endif
@@ -1270,13 +1301,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
                return -ENODEV;
        if (fb->fb_mmap) {
                int res;
-               lock_kernel();
+               mutex_lock(&info->lock);
                res = fb->fb_mmap(info, vma);
-               unlock_kernel();
+               mutex_unlock(&info->lock);
                return res;
        }
 
-       lock_kernel();
+       mutex_lock(&info->lock);
 
        /* frame buffer memory */
        start = info->fix.smem_start;
@@ -1285,13 +1316,13 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
                /* memory mapped io */
                off -= len;
                if (info->var.accel_flags) {
-                       unlock_kernel();
+                       mutex_unlock(&info->lock);
                        return -EINVAL;
                }
                start = info->fix.mmio_start;
                len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len);
        }
-       unlock_kernel();
+       mutex_unlock(&info->lock);
        start &= PAGE_MASK;
        if ((vma->vm_end - vma->vm_start + off) > len)
                return -EINVAL;
@@ -1315,13 +1346,13 @@ fb_open(struct inode *inode, struct file *file)
 
        if (fbidx >= FB_MAX)
                return -ENODEV;
-       lock_kernel();
-       if (!(info = registered_fb[fbidx]))
+       info = registered_fb[fbidx];
+       if (!info)
                request_module("fb%d", fbidx);
-       if (!(info = registered_fb[fbidx])) {
-               res = -ENODEV;
-               goto out;
-       }
+       info = registered_fb[fbidx];
+       if (!info)
+               return -ENODEV;
+       mutex_lock(&info->lock);
        if (!try_module_get(info->fbops->owner)) {
                res = -ENODEV;
                goto out;
@@ -1337,7 +1368,7 @@ fb_open(struct inode *inode, struct file *file)
                fb_deferred_io_open(info, inode, file);
 #endif
 out:
-       unlock_kernel();
+       mutex_unlock(&info->lock);
        return res;
 }
 
@@ -1346,11 +1377,11 @@ fb_release(struct inode *inode, struct file *file)
 {
        struct fb_info * const info = file->private_data;
 
-       lock_kernel();
+       mutex_lock(&info->lock);
        if (info->fbops->fb_release)
                info->fbops->fb_release(info,1);
        module_put(info->fbops->owner);
-       unlock_kernel();
+       mutex_unlock(&info->lock);
        return 0;
 }
 
@@ -1358,7 +1389,7 @@ static const struct file_operations fb_fops = {
        .owner =        THIS_MODULE,
        .read =         fb_read,
        .write =        fb_write,
-       .ioctl =        fb_ioctl,
+       .unlocked_ioctl = fb_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = fb_compat_ioctl,
 #endif
@@ -1429,6 +1460,7 @@ register_framebuffer(struct fb_info *fb_info)
                if (!registered_fb[i])
                        break;
        fb_info->node = i;
+       mutex_init(&fb_info->lock);
 
        fb_info->dev = device_create(fb_class, fb_info->device,
                                     MKDEV(FB_MAJOR, i), NULL, "fb%d", i);
index ed6b0576208c27ca237670999ba328a267b350f9..1f09d4e4144c23598f3cf7560a28630fa9f6a6d3 100644 (file)
@@ -80,7 +80,6 @@ static struct bin_attribute w1_ds2760_bin_attr = {
        .attr = {
                .name = "w1_slave",
                .mode = S_IRUGO,
-               .owner = THIS_MODULE,
        },
        .size = DS2760_DATA_SIZE,
        .read = w1_ds2760_read_bin,
index d0a1174fb516c14f63862e42919d74c044df7882..4eca61c201f01a0fc81f6596fa6cd6a48417a41d 100644 (file)
@@ -1168,195 +1168,7 @@ config EFS_FS
          To compile the EFS file system support as a module, choose M here: the
          module will be called efs.
 
-config JFFS2_FS
-       tristate "Journalling Flash File System v2 (JFFS2) support"
-       select CRC32
-       depends on MTD
-       help
-         JFFS2 is the second generation of the Journalling Flash File System
-         for use on diskless embedded devices. It provides improved wear
-         levelling, compression and support for hard links. You cannot use
-         this on normal block devices, only on 'MTD' devices.
-
-         Further information on the design and implementation of JFFS2 is
-         available at <http://sources.redhat.com/jffs2/>.
-
-config JFFS2_FS_DEBUG
-       int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
-       depends on JFFS2_FS
-       default "0"
-       help
-         This controls the amount of debugging messages produced by the JFFS2
-         code. Set it to zero for use in production systems. For evaluation,
-         testing and debugging, it's advisable to set it to one. This will
-         enable a few assertions and will print debugging messages at the
-         KERN_DEBUG loglevel, where they won't normally be visible. Level 2
-         is unlikely to be useful - it enables extra debugging in certain
-         areas which at one point needed debugging, but when the bugs were
-         located and fixed, the detailed messages were relegated to level 2.
-
-         If reporting bugs, please try to have available a full dump of the
-         messages at debug level 1 while the misbehaviour was occurring.
-
-config JFFS2_FS_WRITEBUFFER
-       bool "JFFS2 write-buffering support"
-       depends on JFFS2_FS
-       default y
-       help
-         This enables the write-buffering support in JFFS2.
-
-         This functionality is required to support JFFS2 on the following
-         types of flash devices:
-           - NAND flash
-           - NOR flash with transparent ECC
-           - DataFlash
-
-config JFFS2_FS_WBUF_VERIFY
-       bool "Verify JFFS2 write-buffer reads"
-       depends on JFFS2_FS_WRITEBUFFER
-       default n
-       help
-         This causes JFFS2 to read back every page written through the
-         write-buffer, and check for errors.
-
-config JFFS2_SUMMARY
-       bool "JFFS2 summary support (EXPERIMENTAL)"
-       depends on JFFS2_FS && EXPERIMENTAL
-       default n
-       help
-         This feature makes it possible to use summary information
-         for faster filesystem mount.
-
-         The summary information can be inserted into a filesystem image
-         by the utility 'sumtool'.
-
-         If unsure, say 'N'.
-
-config JFFS2_FS_XATTR
-       bool "JFFS2 XATTR support (EXPERIMENTAL)"
-       depends on JFFS2_FS && EXPERIMENTAL
-       default n
-       help
-         Extended attributes are name:value pairs associated with inodes by
-         the kernel or by users (see the attr(5) manual page, or visit
-         <http://acl.bestbits.at/> for details).
-
-         If unsure, say N.
-
-config JFFS2_FS_POSIX_ACL
-       bool "JFFS2 POSIX Access Control Lists"
-       depends on JFFS2_FS_XATTR
-       default y
-       select FS_POSIX_ACL
-       help
-         Posix Access Control Lists (ACLs) support permissions for users and
-         groups beyond the owner/group/world scheme.
-
-         To learn more about Access Control Lists, visit the Posix ACLs for
-         Linux website <http://acl.bestbits.at/>.
-
-         If you don't know what Access Control Lists are, say N
-
-config JFFS2_FS_SECURITY
-       bool "JFFS2 Security Labels"
-       depends on JFFS2_FS_XATTR
-       default y
-       help
-         Security labels support alternative access control models
-         implemented by security modules like SELinux.  This option
-         enables an extended attribute handler for file security
-         labels in the jffs2 filesystem.
-
-         If you are not using a security module that requires using
-         extended attributes for file security labels, say N.
-
-config JFFS2_COMPRESSION_OPTIONS
-       bool "Advanced compression options for JFFS2"
-       depends on JFFS2_FS
-       default n
-       help
-         Enabling this option allows you to explicitly choose which
-         compression modules, if any, are enabled in JFFS2. Removing
-         compressors can mean you cannot read existing file systems,
-         and enabling experimental compressors can mean that you
-         write a file system which cannot be read by a standard kernel.
-
-         If unsure, you should _definitely_ say 'N'.
-
-config JFFS2_ZLIB
-       bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
-       select ZLIB_INFLATE
-       select ZLIB_DEFLATE
-       depends on JFFS2_FS
-       default y
-       help
-         Zlib is designed to be a free, general-purpose, legally unencumbered,
-         lossless data-compression library for use on virtually any computer
-         hardware and operating system. See <http://www.gzip.org/zlib/> for
-         further information.
-
-         Say 'Y' if unsure.
-
-config JFFS2_LZO
-       bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
-       select LZO_COMPRESS
-       select LZO_DECOMPRESS
-       depends on JFFS2_FS
-       default n
-       help
-         minilzo-based compression. Generally works better than Zlib.
-
-         This feature was added in July, 2007. Say 'N' if you need
-         compatibility with older bootloaders or kernels.
-
-config JFFS2_RTIME
-       bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
-       depends on JFFS2_FS
-       default y
-       help
-         Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
-
-config JFFS2_RUBIN
-       bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
-       depends on JFFS2_FS
-       default n
-       help
-         RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
-
-choice
-       prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
-       default JFFS2_CMODE_PRIORITY
-       depends on JFFS2_FS
-       help
-         You can set here the default compression mode of JFFS2 from
-         the available compression modes. Don't touch if unsure.
-
-config JFFS2_CMODE_NONE
-       bool "no compression"
-       help
-         Uses no compression.
-
-config JFFS2_CMODE_PRIORITY
-       bool "priority"
-       help
-         Tries the compressors in a predefined order and chooses the first
-         successful one.
-
-config JFFS2_CMODE_SIZE
-       bool "size (EXPERIMENTAL)"
-       help
-         Tries all compressors and chooses the one which has the smallest
-         result.
-
-config JFFS2_CMODE_FAVOURLZO
-       bool "Favour LZO"
-       help
-         Tries all compressors and chooses the one which has the smallest
-         result but gives some preference to LZO (which has faster
-         decompression) at the expense of size.
-
-endchoice
-
+source "fs/jffs2/Kconfig"
 # UBIFS File system configuration
 source "fs/ubifs/Kconfig"
 
@@ -1913,148 +1725,7 @@ config SMB_NLS_REMOTE
 
          smbmount from samba 2.2.0 or later supports this.
 
-config CIFS
-       tristate "CIFS support (advanced network filesystem, SMBFS successor)"
-       depends on INET
-       select NLS
-       help
-         This is the client VFS module for the Common Internet File System
-         (CIFS) protocol which is the successor to the Server Message Block 
-         (SMB) protocol, the native file sharing mechanism for most early
-         PC operating systems.  The CIFS protocol is fully supported by 
-         file servers such as Windows 2000 (including Windows 2003, NT 4  
-         and Windows XP) as well by Samba (which provides excellent CIFS
-         server support for Linux and many other operating systems). Limited
-         support for OS/2 and Windows ME and similar servers is provided as
-         well.
-
-         The cifs module provides an advanced network file system
-         client for mounting to CIFS compliant servers.  It includes
-         support for DFS (hierarchical name space), secure per-user
-         session establishment via Kerberos or NTLM or NTLMv2,
-         safe distributed caching (oplock), optional packet
-         signing, Unicode and other internationalization improvements.
-         If you need to mount to Samba or Windows from this machine, say Y.
-
-config CIFS_STATS
-        bool "CIFS statistics"
-        depends on CIFS
-        help
-          Enabling this option will cause statistics for each server share
-         mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
-
-config CIFS_STATS2
-       bool "Extended statistics"
-       depends on CIFS_STATS
-       help
-         Enabling this option will allow more detailed statistics on SMB
-         request timing to be displayed in /proc/fs/cifs/DebugData and also
-         allow optional logging of slow responses to dmesg (depending on the
-         value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
-         These additional statistics may have a minor effect on performance
-         and memory utilization.
-
-         Unless you are a developer or are doing network performance analysis
-         or tuning, say N.
-
-config CIFS_WEAK_PW_HASH
-       bool "Support legacy servers which use weaker LANMAN security"
-       depends on CIFS
-       help
-         Modern CIFS servers including Samba and most Windows versions
-         (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
-         security mechanisms. These hash the password more securely
-         than the mechanisms used in the older LANMAN version of the
-         SMB protocol but LANMAN based authentication is needed to
-         establish sessions with some old SMB servers.
-
-         Enabling this option allows the cifs module to mount to older
-         LANMAN based servers such as OS/2 and Windows 95, but such
-         mounts may be less secure than mounts using NTLM or more recent
-         security mechanisms if you are on a public network.  Unless you
-         have a need to access old SMB servers (and are on a private
-         network) you probably want to say N.  Even if this support
-         is enabled in the kernel build, LANMAN authentication will not be
-         used automatically. At runtime LANMAN mounts are disabled but
-         can be set to required (or optional) either in
-         /proc/fs/cifs (see fs/cifs/README for more detail) or via an
-         option on the mount command. This support is disabled by
-         default in order to reduce the possibility of a downgrade
-         attack.
-
-         If unsure, say N.
-
-config CIFS_UPCALL
-         bool "Kerberos/SPNEGO advanced session setup"
-         depends on CIFS && KEYS
-         help
-           Enables an upcall mechanism for CIFS which accesses
-           userspace helper utilities to provide SPNEGO packaged (RFC 4178)
-           Kerberos tickets which are needed to mount to certain secure servers
-           (for which more secure Kerberos authentication is required). If
-           unsure, say N.
-
-config CIFS_XATTR
-        bool "CIFS extended attributes"
-        depends on CIFS
-        help
-          Extended attributes are name:value pairs associated with inodes by
-          the kernel or by users (see the attr(5) manual page, or visit
-          <http://acl.bestbits.at/> for details).  CIFS maps the name of
-          extended attributes beginning with the user namespace prefix
-          to SMB/CIFS EAs. EAs are stored on Windows servers without the
-          user namespace prefix, but their names are seen by Linux cifs clients
-          prefaced by the user namespace prefix. The system namespace
-          (used by some filesystems to store ACLs) is not supported at
-          this time.
-
-          If unsure, say N.
-
-config CIFS_POSIX
-        bool "CIFS POSIX Extensions"
-        depends on CIFS_XATTR
-        help
-          Enabling this option will cause the cifs client to attempt to
-         negotiate a newer dialect with servers, such as Samba 3.0.5
-         or later, that optionally can handle more POSIX like (rather
-         than Windows like) file behavior.  It also enables
-         support for POSIX ACLs (getfacl and setfacl) to servers
-         (such as Samba 3.10 and later) which can negotiate
-         CIFS POSIX ACL support.  If unsure, say N.
-
-config CIFS_DEBUG2
-       bool "Enable additional CIFS debugging routines"
-       depends on CIFS
-       help
-          Enabling this option adds a few more debugging routines
-          to the cifs code which slightly increases the size of
-          the cifs module and can cause additional logging of debug
-          messages in some error paths, slowing performance. This
-          option can be turned off unless you are debugging
-          cifs problems.  If unsure, say N.
-
-config CIFS_EXPERIMENTAL
-         bool "CIFS Experimental Features (EXPERIMENTAL)"
-         depends on CIFS && EXPERIMENTAL
-         help
-           Enables cifs features under testing. These features are
-           experimental and currently include DFS support and directory 
-           change notification ie fcntl(F_DNOTIFY), as well as the upcall
-           mechanism which will be used for Kerberos session negotiation
-           and uid remapping.  Some of these features also may depend on 
-           setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
-           (which is disabled by default). See the file fs/cifs/README 
-           for more details.  If unsure, say N.
-
-config CIFS_DFS_UPCALL
-         bool "DFS feature support (EXPERIMENTAL)"
-         depends on CIFS_EXPERIMENTAL
-         depends on KEYS
-         help
-           Enables an upcall mechanism for CIFS which contacts userspace
-           helper utilities to provide server name resolution (host names to
-           IP addresses) which is needed for implicit mounts of DFS junction
-           points. If unsure, say N.
+source "fs/cifs/Kconfig"
 
 config NCP_FS
        tristate "NCP file system support (to mount NetWare volumes)"
index 801db1341811d6eace2732c7238bfd9190ffff1f..ce9fb3fbfae4c3f392b17a1b3bb792c81e318380 100644 (file)
@@ -40,6 +40,28 @@ config BINFMT_ELF_FDPIC
 
          It is also possible to run FDPIC ELF binaries on MMU linux also.
 
+config CORE_DUMP_DEFAULT_ELF_HEADERS
+       bool "Write ELF core dumps with partial segments"
+       default n
+       depends on BINFMT_ELF
+       help
+         ELF core dump files describe each memory mapping of the crashed
+         process, and can contain or omit the memory contents of each one.
+         The contents of an unmodified text mapping are omitted by default.
+
+         For an unmodified text mapping of an ELF object, including just
+         the first page of the file in a core dump makes it possible to
+         identify the build ID bits in the file, without paying the i/o
+         cost and disk space to dump all the text.  However, versions of
+         GDB before 6.7 are confused by ELF core dump files in this format.
+
+         The core dump behavior can be controlled per process using
+         the /proc/PID/coredump_filter pseudo-file; this setting is
+         inherited.  See Documentation/filesystems/proc.txt for details.
+
+         This config option changes the default setting of coredump_filter
+         seen at boot time.  If unsure, say N.
+
 config BINFMT_FLAT
        bool "Kernel support for flat binaries"
        depends on !MMU && (!FRV || BROKEN)
index c76afa26edf735c7644e32b931ac675cbbf08454..e2159063198a072ef75545130ced4eb21e56accf 100644 (file)
@@ -1156,16 +1156,24 @@ static int dump_seek(struct file *file, loff_t off)
 static unsigned long vma_dump_size(struct vm_area_struct *vma,
                                   unsigned long mm_flags)
 {
+#define FILTER(type)   (mm_flags & (1UL << MMF_DUMP_##type))
+
        /* The vma can be set up to tell us the answer directly.  */
        if (vma->vm_flags & VM_ALWAYSDUMP)
                goto whole;
 
+       /* Hugetlb memory check */
+       if (vma->vm_flags & VM_HUGETLB) {
+               if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
+                       goto whole;
+               if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
+                       goto whole;
+       }
+
        /* Do not dump I/O mapped devices or special mappings */
        if (vma->vm_flags & (VM_IO | VM_RESERVED))
                return 0;
 
-#define FILTER(type)   (mm_flags & (1UL << MMF_DUMP_##type))
-
        /* By default, dump shared memory if mapped from an anonymous file. */
        if (vma->vm_flags & VM_SHARED) {
                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
index ac78d4c19b3b3b27e812f13c0bd7669e962cea04..6569fda5cfed892c7e4ed53daa2e9677e60c36d5 100644 (file)
@@ -76,8 +76,7 @@ EXPORT_SYMBOL(__lock_buffer);
 
 void unlock_buffer(struct buffer_head *bh)
 {
-       smp_mb__before_clear_bit();
-       clear_buffer_locked(bh);
+       clear_bit_unlock(BH_Lock, &bh->b_state);
        smp_mb__after_clear_bit();
        wake_up_bit(&bh->b_state, BH_Lock);
 }
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
new file mode 100644 (file)
index 0000000..341a989
--- /dev/null
@@ -0,0 +1,142 @@
+config CIFS
+       tristate "CIFS support (advanced network filesystem, SMBFS successor)"
+       depends on INET
+       select NLS
+       help
+         This is the client VFS module for the Common Internet File System
+         (CIFS) protocol which is the successor to the Server Message Block
+         (SMB) protocol, the native file sharing mechanism for most early
+         PC operating systems.  The CIFS protocol is fully supported by
+         file servers such as Windows 2000 (including Windows 2003, NT 4
+         and Windows XP) as well by Samba (which provides excellent CIFS
+         server support for Linux and many other operating systems). Limited
+         support for OS/2 and Windows ME and similar servers is provided as
+         well.
+
+         The cifs module provides an advanced network file system
+         client for mounting to CIFS compliant servers.  It includes
+         support for DFS (hierarchical name space), secure per-user
+         session establishment via Kerberos or NTLM or NTLMv2,
+         safe distributed caching (oplock), optional packet
+         signing, Unicode and other internationalization improvements.
+         If you need to mount to Samba or Windows from this machine, say Y.
+
+config CIFS_STATS
+        bool "CIFS statistics"
+        depends on CIFS
+        help
+          Enabling this option will cause statistics for each server share
+         mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
+
+config CIFS_STATS2
+       bool "Extended statistics"
+       depends on CIFS_STATS
+       help
+         Enabling this option will allow more detailed statistics on SMB
+         request timing to be displayed in /proc/fs/cifs/DebugData and also
+         allow optional logging of slow responses to dmesg (depending on the
+         value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
+         These additional statistics may have a minor effect on performance
+         and memory utilization.
+
+         Unless you are a developer or are doing network performance analysis
+         or tuning, say N.
+
+config CIFS_WEAK_PW_HASH
+       bool "Support legacy servers which use weaker LANMAN security"
+       depends on CIFS
+       help
+         Modern CIFS servers including Samba and most Windows versions
+         (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
+         security mechanisms. These hash the password more securely
+         than the mechanisms used in the older LANMAN version of the
+         SMB protocol but LANMAN based authentication is needed to
+         establish sessions with some old SMB servers.
+
+         Enabling this option allows the cifs module to mount to older
+         LANMAN based servers such as OS/2 and Windows 95, but such
+         mounts may be less secure than mounts using NTLM or more recent
+         security mechanisms if you are on a public network.  Unless you
+         have a need to access old SMB servers (and are on a private
+         network) you probably want to say N.  Even if this support
+         is enabled in the kernel build, LANMAN authentication will not be
+         used automatically. At runtime LANMAN mounts are disabled but
+         can be set to required (or optional) either in
+         /proc/fs/cifs (see fs/cifs/README for more detail) or via an
+         option on the mount command. This support is disabled by
+         default in order to reduce the possibility of a downgrade
+         attack.
+
+         If unsure, say N.
+
+config CIFS_UPCALL
+         bool "Kerberos/SPNEGO advanced session setup"
+         depends on CIFS && KEYS
+         help
+           Enables an upcall mechanism for CIFS which accesses
+           userspace helper utilities to provide SPNEGO packaged (RFC 4178)
+           Kerberos tickets which are needed to mount to certain secure servers
+           (for which more secure Kerberos authentication is required). If
+           unsure, say N.
+
+config CIFS_XATTR
+        bool "CIFS extended attributes"
+        depends on CIFS
+        help
+          Extended attributes are name:value pairs associated with inodes by
+          the kernel or by users (see the attr(5) manual page, or visit
+          <http://acl.bestbits.at/> for details).  CIFS maps the name of
+          extended attributes beginning with the user namespace prefix
+          to SMB/CIFS EAs. EAs are stored on Windows servers without the
+          user namespace prefix, but their names are seen by Linux cifs clients
+          prefaced by the user namespace prefix. The system namespace
+          (used by some filesystems to store ACLs) is not supported at
+          this time.
+
+          If unsure, say N.
+
+config CIFS_POSIX
+        bool "CIFS POSIX Extensions"
+        depends on CIFS_XATTR
+        help
+          Enabling this option will cause the cifs client to attempt to
+         negotiate a newer dialect with servers, such as Samba 3.0.5
+         or later, that optionally can handle more POSIX like (rather
+         than Windows like) file behavior.  It also enables
+         support for POSIX ACLs (getfacl and setfacl) to servers
+         (such as Samba 3.10 and later) which can negotiate
+         CIFS POSIX ACL support.  If unsure, say N.
+
+config CIFS_DEBUG2
+       bool "Enable additional CIFS debugging routines"
+       depends on CIFS
+       help
+          Enabling this option adds a few more debugging routines
+          to the cifs code which slightly increases the size of
+          the cifs module and can cause additional logging of debug
+          messages in some error paths, slowing performance. This
+          option can be turned off unless you are debugging
+          cifs problems.  If unsure, say N.
+
+config CIFS_EXPERIMENTAL
+         bool "CIFS Experimental Features (EXPERIMENTAL)"
+         depends on CIFS && EXPERIMENTAL
+         help
+           Enables cifs features under testing. These features are
+           experimental and currently include DFS support and directory
+           change notification ie fcntl(F_DNOTIFY), as well as the upcall
+           mechanism which will be used for Kerberos session negotiation
+           and uid remapping.  Some of these features also may depend on
+           setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
+           (which is disabled by default). See the file fs/cifs/README
+           for more details.  If unsure, say N.
+
+config CIFS_DFS_UPCALL
+         bool "DFS feature support (EXPERIMENTAL)"
+         depends on CIFS_EXPERIMENTAL
+         depends on KEYS
+         help
+           Enables an upcall mechanism for CIFS which contacts userspace
+           helper utilities to provide server name resolution (host names to
+           IP addresses) which is needed for implicit mounts of DFS junction
+           points. If unsure, say N.
index c4a8a0605125aa3c2f93359ac98b43de5060bed9..62d8bd8f14c086f1c9f53640b9144f1cca349289 100644 (file)
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
                SetPageUptodate(page);
                unlock_page(page);
                if (!pagevec_add(plru_pvec, page))
-                       __pagevec_lru_add(plru_pvec);
+                       __pagevec_lru_add_file(plru_pvec);
                data += PAGE_CACHE_SIZE;
        }
        return;
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                bytes_read = 0;
        }
 
-       pagevec_lru_add(&lru_pvec);
+       pagevec_lru_add_file(&lru_pvec);
 
 /* need to free smb_read_data buf before exit */
        if (smb_read_data) {
index a41e7902ed0bf9ef9e2cd600c31b1acda0576ed4..4e834f16d9da7d49c0f232b9fd46a7db2c680070 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1386,7 +1386,7 @@ EXPORT_SYMBOL(set_binfmt);
  * name into corename, which must have space for at least
  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  */
-static int format_corename(char *corename, int nr_threads, long signr)
+static int format_corename(char *corename, long signr)
 {
        const char *pat_ptr = core_pattern;
        int ispipe = (*pat_ptr == '|');
@@ -1493,8 +1493,7 @@ static int format_corename(char *corename, int nr_threads, long signr)
         * If core_pattern does not include a %p (as is the default)
         * and core_uses_pid is set, then .%pid will be appended to
         * the filename. Do not do this for piped commands. */
-       if (!ispipe && !pid_in_pattern
-           && (core_uses_pid || nr_threads)) {
+       if (!ispipe && !pid_in_pattern && core_uses_pid) {
                rc = snprintf(out_ptr, out_end - out_ptr,
                              ".%d", task_tgid_vnr(current));
                if (rc > out_end - out_ptr)
@@ -1757,7 +1756,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         * uses lock_kernel()
         */
        lock_kernel();
-       ispipe = format_corename(corename, retval, signr);
+       ispipe = format_corename(corename, signr);
        unlock_kernel();
        /*
         * Don't bother to check the RLIMIT_CORE value if core_pattern points
index 92fd0338a6ebe3df16739b65feec4e4ef034206f..f5b57a2ca35a5c4cb10a369933bec40291c78109 100644 (file)
@@ -1547,6 +1547,7 @@ retry_alloc:
         * turn off reservation for this allocation
         */
        if (my_rsv && (free_blocks < windowsz)
+               && (free_blocks > 0)
                && (rsv_is_empty(&my_rsv->rsv_window)))
                my_rsv = NULL;
 
@@ -1585,7 +1586,7 @@ retry_alloc:
                 * free blocks is less than half of the reservation
                 * window size.
                 */
-               if (free_blocks <= (windowsz/2))
+               if (my_rsv && (free_blocks <= (windowsz/2)))
                        continue;
 
                brelse(bitmap_bh);
index 2eea96ec78ed218f4fbe14b8e4c1b169e1dfe1ac..4c82531ea0a8b0d13ce61b63eb7daeecbe5d1423 100644 (file)
@@ -102,6 +102,7 @@ static int ext3_readdir(struct file * filp,
        int err;
        struct inode *inode = filp->f_path.dentry->d_inode;
        int ret = 0;
+       int dir_has_error = 0;
 
        sb = inode->i_sb;
 
@@ -148,9 +149,12 @@ static int ext3_readdir(struct file * filp,
                 * of recovering data when there's a bad sector
                 */
                if (!bh) {
-                       ext3_error (sb, "ext3_readdir",
-                               "directory #%lu contains a hole at offset %lu",
-                               inode->i_ino, (unsigned long)filp->f_pos);
+                       if (!dir_has_error) {
+                               ext3_error(sb, __func__, "directory #%lu "
+                                       "contains a hole at offset %lld",
+                                       inode->i_ino, filp->f_pos);
+                               dir_has_error = 1;
+                       }
                        /* corrupt size?  Maybe no more blocks to read */
                        if (filp->f_pos > inode->i_blocks << 9)
                                break;
@@ -410,7 +414,7 @@ static int call_filldir(struct file * filp, void * dirent,
                                get_dtype(sb, fname->file_type));
                if (error) {
                        filp->f_pos = curr_pos;
-                       info->extra_fname = fname->next;
+                       info->extra_fname = fname;
                        return error;
                }
                fname = fname->next;
@@ -449,11 +453,21 @@ static int ext3_dx_readdir(struct file * filp,
         * If there are any leftover names on the hash collision
         * chain, return them first.
         */
-       if (info->extra_fname &&
-           call_filldir(filp, dirent, filldir, info->extra_fname))
-               goto finished;
+       if (info->extra_fname) {
+               if (call_filldir(filp, dirent, filldir, info->extra_fname))
+                       goto finished;
 
-       if (!info->curr_node)
+               info->extra_fname = NULL;
+               info->curr_node = rb_next(info->curr_node);
+               if (!info->curr_node) {
+                       if (info->next_hash == ~0) {
+                               filp->f_pos = EXT3_HTREE_EOF;
+                               goto finished;
+                       }
+                       info->curr_hash = info->next_hash;
+                       info->curr_minor_hash = 0;
+               }
+       } else if (!info->curr_node)
                info->curr_node = rb_first(&info->root);
 
        while (1) {
index ebfec4d0148e9e272e2a25efd0e3b551381408c6..f8424ad8997195f0cdd2d2c1c53196f16651d7c8 100644 (file)
@@ -1186,6 +1186,13 @@ write_begin_failed:
                ext3_journal_stop(handle);
                unlock_page(page);
                page_cache_release(page);
+               /*
+                * block_write_begin may have instantiated a few blocks
+                * outside i_size.  Trim these off again. Don't need
+                * i_size_read because we hold i_mutex.
+                */
+               if (pos + len > inode->i_size)
+                       vmtruncate(inode, inode->i_size);
        }
        if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
index 77278e947e9416d8b1165cfab6d7554f21131a67..78fdf383637022566f62267759629d250e04712c 100644 (file)
@@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 
        if (reserved_gdb || gdb_off == 0) {
                if (!EXT3_HAS_COMPAT_FEATURE(sb,
-                                            EXT3_FEATURE_COMPAT_RESIZE_INODE)){
+                                            EXT3_FEATURE_COMPAT_RESIZE_INODE)
+                   || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
                        ext3_warning(sb, __func__,
                                     "No reserved GDT blocks, can't resize");
                        return -EPERM;
index 399a96a6c5561666f61c7a8f7cae3a71dbc5b52f..3a260af5544d24dfc85f0dffc03b42771c322295 100644 (file)
@@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
        else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
                seq_puts(seq, ",data=writeback");
 
+       if (test_opt(sb, DATA_ERR_ABORT))
+               seq_puts(seq, ",data_err=abort");
+
        ext3_show_quota_options(seq, sb);
 
        return 0;
@@ -754,6 +757,7 @@ enum {
        Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
        Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+       Opt_data_err_abort, Opt_data_err_ignore,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -796,6 +800,8 @@ static const match_table_t tokens = {
        {Opt_data_journal, "data=journal"},
        {Opt_data_ordered, "data=ordered"},
        {Opt_data_writeback, "data=writeback"},
+       {Opt_data_err_abort, "data_err=abort"},
+       {Opt_data_err_ignore, "data_err=ignore"},
        {Opt_offusrjquota, "usrjquota="},
        {Opt_usrjquota, "usrjquota=%s"},
        {Opt_offgrpjquota, "grpjquota="},
@@ -1011,6 +1017,12 @@ static int parse_options (char *options, struct super_block *sb,
                                sbi->s_mount_opt |= data_opt;
                        }
                        break;
+               case Opt_data_err_abort:
+                       set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+                       break;
+               case Opt_data_err_ignore:
+                       clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
+                       break;
 #ifdef CONFIG_QUOTA
                case Opt_usrjquota:
                        qtype = USRQUOTA;
@@ -1986,6 +1998,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
                journal->j_flags |= JFS_BARRIER;
        else
                journal->j_flags &= ~JFS_BARRIER;
+       if (test_opt(sb, DATA_ERR_ABORT))
+               journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
+       else
+               journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
        spin_unlock(&journal->j_state_lock);
 }
 
index fec8f61227ffb28792a7b94f30417f003d786882..0022eec63cdacd97c2a438b8d9f623ff6be88dd4 100644 (file)
@@ -199,6 +199,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
                goto done;
        }
 
+       if (inode->i_ino == HFSPLUS_EXT_CNID)
+               return -EIO;
+
        mutex_lock(&HFSPLUS_I(inode).extents_lock);
        res = hfsplus_ext_read_extent(inode, ablock);
        if (!res) {
index b085d64a2b672580e328be86af33475e87544f6e..963be644297aeb45c2d376721c5cc70ee8ce28f5 100644 (file)
@@ -254,6 +254,8 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
 {
        if (HFSPLUS_IS_RSRC(inode))
                inode = HFSPLUS_I(inode).rsrc_inode;
+       if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+               return -EOVERFLOW;
        atomic_inc(&HFSPLUS_I(inode).opencnt);
        return 0;
 }
index ae08c057e751454ad2f0d922faa9be08d3c6d80d..25719d902c5116a6ff50248b272e5603c196f0f7 100644 (file)
@@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal)
                printk(KERN_WARNING
                        "JBD: Detected IO errors while flushing file data "
                        "on %s\n", bdevname(journal->j_fs_dev, b));
+               if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
+                       journal_abort(journal, err);
                err = 0;
        }
 
@@ -518,9 +520,10 @@ void journal_commit_transaction(journal_t *journal)
                jh = commit_transaction->t_buffers;
 
                /* If we're in abort mode, we just un-journal the buffer and
-                  release it for background writing. */
+                  release it. */
 
                if (is_journal_aborted(journal)) {
+                       clear_buffer_jbddirty(jh2bh(jh));
                        JBUFFER_TRACE(jh, "journal is aborting: refile");
                        journal_refile_buffer(journal, jh);
                        /* If that was the last one, we need to clean up
@@ -762,6 +765,9 @@ wait_for_iobuf:
                /* AKPM: bforget here */
        }
 
+       if (err)
+               journal_abort(journal, err);
+
        jbd_debug(3, "JBD: commit phase 6\n");
 
        if (journal_write_commit_record(journal, commit_transaction))
@@ -852,6 +858,8 @@ restart_loop:
                if (buffer_jbddirty(bh)) {
                        JBUFFER_TRACE(jh, "add to new checkpointing trans");
                        __journal_insert_checkpoint(jh, commit_transaction);
+                       if (is_journal_aborted(journal))
+                               clear_buffer_jbddirty(bh);
                        JBUFFER_TRACE(jh, "refile for checkpoint writeback");
                        __journal_refile_buffer(jh);
                        jbd_unlock_bh_state(bh);
index 0540ca27a4464060e9c230095f664cd9937b21a1..d15cd6e7251ee03347f25f14f53e371ff7936d09 100644 (file)
@@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
        journal_t *journal = handle->h_transaction->t_journal;
        int need_brelse = 0;
        struct journal_head *jh;
+       int ret = 0;
 
        if (is_handle_aborted(handle))
-               return 0;
+               return ret;
 
        jh = journal_add_journal_head(bh);
        JBUFFER_TRACE(jh, "entry");
@@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
                                   time if it is redirtied */
                        }
 
-                       /* journal_clean_data_list() may have got there first */
+                       /*
+                        * We cannot remove the buffer with io error from the
+                        * committing transaction, because otherwise it would
+                        * miss the error and the commit would not abort.
+                        */
+                       if (unlikely(!buffer_uptodate(bh))) {
+                               ret = -EIO;
+                               goto no_journal;
+                       }
+
                        if (jh->b_transaction != NULL) {
                                JBUFFER_TRACE(jh, "unfile from commit");
                                __journal_temp_unlink_buffer(jh);
@@ -1108,7 +1118,7 @@ no_journal:
        }
        JBUFFER_TRACE(jh, "exit");
        journal_put_journal_head(jh);
-       return 0;
+       return ret;
 }
 
 /**
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
new file mode 100644 (file)
index 0000000..6ae169c
--- /dev/null
@@ -0,0 +1,188 @@
+config JFFS2_FS
+       tristate "Journalling Flash File System v2 (JFFS2) support"
+       select CRC32
+       depends on MTD
+       help
+         JFFS2 is the second generation of the Journalling Flash File System
+         for use on diskless embedded devices. It provides improved wear
+         levelling, compression and support for hard links. You cannot use
+         this on normal block devices, only on 'MTD' devices.
+
+         Further information on the design and implementation of JFFS2 is
+         available at <http://sources.redhat.com/jffs2/>.
+
+config JFFS2_FS_DEBUG
+       int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)"
+       depends on JFFS2_FS
+       default "0"
+       help
+         This controls the amount of debugging messages produced by the JFFS2
+         code. Set it to zero for use in production systems. For evaluation,
+         testing and debugging, it's advisable to set it to one. This will
+         enable a few assertions and will print debugging messages at the
+         KERN_DEBUG loglevel, where they won't normally be visible. Level 2
+         is unlikely to be useful - it enables extra debugging in certain
+         areas which at one point needed debugging, but when the bugs were
+         located and fixed, the detailed messages were relegated to level 2.
+
+         If reporting bugs, please try to have available a full dump of the
+         messages at debug level 1 while the misbehaviour was occurring.
+
+config JFFS2_FS_WRITEBUFFER
+       bool "JFFS2 write-buffering support"
+       depends on JFFS2_FS
+       default y
+       help
+         This enables the write-buffering support in JFFS2.
+
+         This functionality is required to support JFFS2 on the following
+         types of flash devices:
+           - NAND flash
+           - NOR flash with transparent ECC
+           - DataFlash
+
+config JFFS2_FS_WBUF_VERIFY
+       bool "Verify JFFS2 write-buffer reads"
+       depends on JFFS2_FS_WRITEBUFFER
+       default n
+       help
+         This causes JFFS2 to read back every page written through the
+         write-buffer, and check for errors.
+
+config JFFS2_SUMMARY
+       bool "JFFS2 summary support (EXPERIMENTAL)"
+       depends on JFFS2_FS && EXPERIMENTAL
+       default n
+       help
+         This feature makes it possible to use summary information
+         for faster filesystem mount.
+
+         The summary information can be inserted into a filesystem image
+         by the utility 'sumtool'.
+
+         If unsure, say 'N'.
+
+config JFFS2_FS_XATTR
+       bool "JFFS2 XATTR support (EXPERIMENTAL)"
+       depends on JFFS2_FS && EXPERIMENTAL
+       default n
+       help
+         Extended attributes are name:value pairs associated with inodes by
+         the kernel or by users (see the attr(5) manual page, or visit
+         <http://acl.bestbits.at/> for details).
+
+         If unsure, say N.
+
+config JFFS2_FS_POSIX_ACL
+       bool "JFFS2 POSIX Access Control Lists"
+       depends on JFFS2_FS_XATTR
+       default y
+       select FS_POSIX_ACL
+       help
+         Posix Access Control Lists (ACLs) support permissions for users and
+         groups beyond the owner/group/world scheme.
+
+         To learn more about Access Control Lists, visit the Posix ACLs for
+         Linux website <http://acl.bestbits.at/>.
+
+         If you don't know what Access Control Lists are, say N
+
+config JFFS2_FS_SECURITY
+       bool "JFFS2 Security Labels"
+       depends on JFFS2_FS_XATTR
+       default y
+       help
+         Security labels support alternative access control models
+         implemented by security modules like SELinux.  This option
+         enables an extended attribute handler for file security
+         labels in the jffs2 filesystem.
+
+         If you are not using a security module that requires using
+         extended attributes for file security labels, say N.
+
+config JFFS2_COMPRESSION_OPTIONS
+       bool "Advanced compression options for JFFS2"
+       depends on JFFS2_FS
+       default n
+       help
+         Enabling this option allows you to explicitly choose which
+         compression modules, if any, are enabled in JFFS2. Removing
+         compressors can mean you cannot read existing file systems,
+         and enabling experimental compressors can mean that you
+         write a file system which cannot be read by a standard kernel.
+
+         If unsure, you should _definitely_ say 'N'.
+
+config JFFS2_ZLIB
+       bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS
+       select ZLIB_INFLATE
+       select ZLIB_DEFLATE
+       depends on JFFS2_FS
+       default y
+       help
+         Zlib is designed to be a free, general-purpose, legally unencumbered,
+         lossless data-compression library for use on virtually any computer
+         hardware and operating system. See <http://www.gzip.org/zlib/> for
+         further information.
+
+         Say 'Y' if unsure.
+
+config JFFS2_LZO
+       bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
+       select LZO_COMPRESS
+       select LZO_DECOMPRESS
+       depends on JFFS2_FS
+       default n
+       help
+         minilzo-based compression. Generally works better than Zlib.
+
+         This feature was added in July, 2007. Say 'N' if you need
+         compatibility with older bootloaders or kernels.
+
+config JFFS2_RTIME
+       bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
+       depends on JFFS2_FS
+       default y
+       help
+         Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
+
+config JFFS2_RUBIN
+       bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
+       depends on JFFS2_FS
+       default n
+       help
+         RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
+
+choice
+       prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
+       default JFFS2_CMODE_PRIORITY
+       depends on JFFS2_FS
+       help
+         You can set here the default compression mode of JFFS2 from
+         the available compression modes. Don't touch if unsure.
+
+config JFFS2_CMODE_NONE
+       bool "no compression"
+       help
+         Uses no compression.
+
+config JFFS2_CMODE_PRIORITY
+       bool "priority"
+       help
+         Tries the compressors in a predefined order and chooses the first
+         successful one.
+
+config JFFS2_CMODE_SIZE
+       bool "size (EXPERIMENTAL)"
+       help
+         Tries all compressors and chooses the one which has the smallest
+         result.
+
+config JFFS2_CMODE_FAVOURLZO
+       bool "Favour LZO"
+       help
+         Tries all compressors and chooses the one which has the smallest
+         result but gives some preference to LZO (which has faster
+         decompression) at the expense of size.
+
+endchoice
index 86739ee53b37336c15cbc341180c47d195d9081a..f25e70c1b51c5854d987558ec5c84c0c94410b73 100644 (file)
@@ -53,8 +53,8 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this,
 }
 
 /* jffs2_compress:
- * @data: Pointer to uncompressed data
- * @cdata: Pointer to returned pointer to buffer for compressed data
+ * @data_in: Pointer to uncompressed data
+ * @cpage_out: Pointer to returned pointer to buffer for compressed data
  * @datalen: On entry, holds the amount of data available for compression.
  *     On exit, expected to hold the amount of data actually compressed.
  * @cdatalen: On entry, holds the amount of space available for compressed
index cd219ef552543cdc98d0a392f95cd6dc1c3ec4fd..b1aaae823a5227de63ebb0175e904f435ab27772 100644 (file)
@@ -311,7 +311,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
        /* FIXME: If you care. We'd need to use frags for the target
           if it grows much more than this */
        if (targetlen > 254)
-               return -EINVAL;
+               return -ENAMETOOLONG;
 
        ri = jffs2_alloc_raw_inode();
 
index dddb2a6c9e2cfc087b6becc7f88d9e85ab1d02b4..259461b910afec4542ea5f13af00bf0c63b3bf42 100644 (file)
@@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
        instr->len = c->sector_size;
        instr->callback = jffs2_erase_callback;
        instr->priv = (unsigned long)(&instr[1]);
-       instr->fail_addr = 0xffffffff;
+       instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
 
        ((struct erase_priv_struct *)instr->priv)->jeb = jeb;
        ((struct erase_priv_struct *)instr->priv)->c = c;
@@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
 {
        /* For NAND, if the failure did not occur at the device level for a
           specific physical page, don't bother updating the bad block table. */
-       if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) {
+       if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) {
                /* We had a device-level failure to erase.  Let's see if we've
                   failed too many times. */
                if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
index 086c4383022181f3624f333143daf9d5ddf99081..249305d65d5bd663017fac0b6c637faeb7eebfa2 100644 (file)
@@ -207,6 +207,8 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_files = 0;
        buf->f_ffree = 0;
        buf->f_namelen = JFFS2_MAX_NAME_LEN;
+       buf->f_fsid.val[0] = JFFS2_SUPER_MAGIC;
+       buf->f_fsid.val[1] = c->mtd->index;
 
        spin_lock(&c->erase_completion_lock);
        avail = c->dirty_size + c->free_size;
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
 
        memset(ri, 0, sizeof(*ri));
        /* Set OS-specific defaults for new inodes */
-       ri->uid = cpu_to_je16(current->fsuid);
+       ri->uid = cpu_to_je16(current_fsuid());
 
        if (dir_i->i_mode & S_ISGID) {
                ri->gid = cpu_to_je16(dir_i->i_gid);
                if (S_ISDIR(mode))
                        mode |= S_ISGID;
        } else {
-               ri->gid = cpu_to_je16(current->fsgid);
+               ri->gid = cpu_to_je16(current_fsgid());
        }
 
        /* POSIX ACLs have to be processed now, at least partly.
index a9bf9603c1ba0cca4fac8d56ea2dae4bbfe33b91..0875b60b4bf700cc8fda2fad76437d0dc9ea7420 100644 (file)
@@ -261,6 +261,10 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
 
        jffs2_sum_reset_collected(c->summary); /* reset collected summary */
 
+       /* adjust write buffer offset, else we get a non contiguous write bug */
+       if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
+               c->wbuf_ofs = 0xffffffff;
+
        D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset));
 
        return 0;
index 0e78b00035e47a313ba43bafba87dfffbe9cf1b4..d9a721e6db70139073ca055cbea471769f6a83dd 100644 (file)
@@ -679,10 +679,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
 
        memset(c->wbuf,0xff,c->wbuf_pagesize);
        /* adjust write buffer offset, else we get a non contiguous write bug */
-       if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize))
-               c->wbuf_ofs += c->wbuf_pagesize;
-       else
-               c->wbuf_ofs = 0xffffffff;
+       c->wbuf_ofs += c->wbuf_pagesize;
        c->wbuf_len = 0;
        return 0;
 }
index 2ab70d46ecbc7ac43a0585183adcfe248ac5b127..efdba2e802d78bcdf520794e964e873fce793280 100644 (file)
@@ -1517,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
        if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
                                                        GFP_KERNEL)) {
                pagevec_add(&lru_pvec, page);
-               pagevec_lru_add(&lru_pvec);
+               pagevec_lru_add_file(&lru_pvec);
                SetPageUptodate(page);
                unlock_page(page);
        } else
index d020866d42320dddd0ed6a8b9a9d31e572a42cab..3140a4429af12febede2a87e204086b7dd310d22 100644 (file)
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
                        pages[nr] = *cached_page;
                        page_cache_get(*cached_page);
                        if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
-                               __pagevec_lru_add(lru_pvec);
+                               __pagevec_lru_add_file(lru_pvec);
                        *cached_page = NULL;
                }
                index++;
@@ -2084,7 +2084,7 @@ err_out:
                                                OSYNC_METADATA|OSYNC_DATA);
                }
        }
-       pagevec_lru_add(&lru_pvec);
+       pagevec_lru_add_file(&lru_pvec);
        ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
                        written ? "written" : "status", (unsigned long)written,
                        (long)status);
index 59ea42e1ef03a0a4acdf3f73283d913b740415fd..61b25f4eabe6635bdf013e4879dd8e725f10f01d 100644 (file)
@@ -136,6 +136,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
        unsigned long allowed;
        struct vmalloc_info vmi;
        long cached;
+       unsigned long pages[NR_LRU_LISTS];
+       int lru;
 
 /*
  * display in kilobytes.
@@ -154,51 +156,70 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 
        get_vmalloc_info(&vmi);
 
+       for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+               pages[lru] = global_page_state(NR_LRU_BASE + lru);
+
        /*
         * Tagged format, for easy grepping and expansion.
         */
        len = sprintf(page,
-               "MemTotal:     %8lu kB\n"
-               "MemFree:      %8lu kB\n"
-               "Buffers:      %8lu kB\n"
-               "Cached:       %8lu kB\n"
-               "SwapCached:   %8lu kB\n"
-               "Active:       %8lu kB\n"
-               "Inactive:     %8lu kB\n"
+               "MemTotal:       %8lu kB\n"
+               "MemFree:        %8lu kB\n"
+               "Buffers:        %8lu kB\n"
+               "Cached:         %8lu kB\n"
+               "SwapCached:     %8lu kB\n"
+               "Active:         %8lu kB\n"
+               "Inactive:       %8lu kB\n"
+               "Active(anon):   %8lu kB\n"
+               "Inactive(anon): %8lu kB\n"
+               "Active(file):   %8lu kB\n"
+               "Inactive(file): %8lu kB\n"
+#ifdef CONFIG_UNEVICTABLE_LRU
+               "Unevictable:    %8lu kB\n"
+               "Mlocked:        %8lu kB\n"
+#endif
 #ifdef CONFIG_HIGHMEM
-               "HighTotal:    %8lu kB\n"
-               "HighFree:     %8lu kB\n"
-               "LowTotal:     %8lu kB\n"
-               "LowFree:      %8lu kB\n"
+               "HighTotal:      %8lu kB\n"
+               "HighFree:       %8lu kB\n"
+               "LowTotal:       %8lu kB\n"
+               "LowFree:        %8lu kB\n"
 #endif
-               "SwapTotal:    %8lu kB\n"
-               "SwapFree:     %8lu kB\n"
-               "Dirty:        %8lu kB\n"
-               "Writeback:    %8lu kB\n"
-               "AnonPages:    %8lu kB\n"
-               "Mapped:       %8lu kB\n"
-               "Slab:         %8lu kB\n"
-               "SReclaimable: %8lu kB\n"
-               "SUnreclaim:   %8lu kB\n"
-               "PageTables:   %8lu kB\n"
+               "SwapTotal:      %8lu kB\n"
+               "SwapFree:       %8lu kB\n"
+               "Dirty:          %8lu kB\n"
+               "Writeback:      %8lu kB\n"
+               "AnonPages:      %8lu kB\n"
+               "Mapped:         %8lu kB\n"
+               "Slab:           %8lu kB\n"
+               "SReclaimable:   %8lu kB\n"
+               "SUnreclaim:     %8lu kB\n"
+               "PageTables:     %8lu kB\n"
 #ifdef CONFIG_QUICKLIST
-               "Quicklists:   %8lu kB\n"
+               "Quicklists:     %8lu kB\n"
 #endif
-               "NFS_Unstable: %8lu kB\n"
-               "Bounce:       %8lu kB\n"
-               "WritebackTmp: %8lu kB\n"
-               "CommitLimit:  %8lu kB\n"
-               "Committed_AS: %8lu kB\n"
-               "VmallocTotal: %8lu kB\n"
-               "VmallocUsed:  %8lu kB\n"
-               "VmallocChunk: %8lu kB\n",
+               "NFS_Unstable:   %8lu kB\n"
+               "Bounce:         %8lu kB\n"
+               "WritebackTmp:   %8lu kB\n"
+               "CommitLimit:    %8lu kB\n"
+               "Committed_AS:   %8lu kB\n"
+               "VmallocTotal:   %8lu kB\n"
+               "VmallocUsed:    %8lu kB\n"
+               "VmallocChunk:   %8lu kB\n",
                K(i.totalram),
                K(i.freeram),
                K(i.bufferram),
                K(cached),
                K(total_swapcache_pages),
-               K(global_page_state(NR_ACTIVE)),
-               K(global_page_state(NR_INACTIVE)),
+               K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
+               K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
+               K(pages[LRU_ACTIVE_ANON]),
+               K(pages[LRU_INACTIVE_ANON]),
+               K(pages[LRU_ACTIVE_FILE]),
+               K(pages[LRU_INACTIVE_FILE]),
+#ifdef CONFIG_UNEVICTABLE_LRU
+               K(pages[LRU_UNEVICTABLE]),
+               K(global_page_state(NR_MLOCK)),
+#endif
 #ifdef CONFIG_HIGHMEM
                K(i.totalhigh),
                K(i.freehigh),
index 841368b87a29dbb89c68c4539b6203a7e0501905..cd9ca67f841bd38d6256f14eccfd3fd6d8174522 100644 (file)
@@ -32,9 +32,6 @@ static size_t elfcorebuf_sz;
 /* Total size of vmcore file. */
 static u64 vmcore_size;
 
-/* Stores the physical address of elf header of crash image. */
-unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-
 struct proc_dir_entry *proc_vmcore = NULL;
 
 /* Reads a page from the oldmem device from given offset. */
@@ -647,7 +644,7 @@ static int __init vmcore_init(void)
        int rc = 0;
 
        /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
-       if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX))
+       if (!(is_vmcore_usable()))
                return rc;
        rc = parse_crash_elf_headers();
        if (rc) {
index 5145cb9125af8c62a574b0dde476a4f7e8f8f814..76acdbc3461144512208755d4b69946d7eda76e3 100644 (file)
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
                        goto add_error;
 
                if (!pagevec_add(&lru_pvec, page))
-                       __pagevec_lru_add(&lru_pvec);
+                       __pagevec_lru_add_file(&lru_pvec);
 
                unlock_page(page);
        }
 
-       pagevec_lru_add(&lru_pvec);
+       pagevec_lru_add_file(&lru_pvec);
        return 0;
 
  fsize_exceeded:
index b13123424e49ce169da5c729c0e327fdf2259bf8..f031d1c925f0b72ff562a5b839674eb94b4ecf94 100644 (file)
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
                inode->i_mapping->a_ops = &ramfs_aops;
                inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
                mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+               mapping_set_unevictable(inode->i_mapping);
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                switch (mode & S_IFMT) {
                default:
index bd20f7f5a93342b57a4802010c788683deb63247..eba2eabcd2b86a40495845053d9d647e228540f9 100644 (file)
@@ -452,17 +452,34 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 
 int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
 {
-       size_t len = bitmap_scnprintf_len(nr_bits);
+       if (m->count < m->size) {
+               int len = bitmap_scnprintf(m->buf + m->count,
+                               m->size - m->count, bits, nr_bits);
+               if (m->count + len < m->size) {
+                       m->count += len;
+                       return 0;
+               }
+       }
+       m->count = m->size;
+       return -1;
+}
+EXPORT_SYMBOL(seq_bitmap);
 
-       if (m->count + len < m->size) {
-               bitmap_scnprintf(m->buf + m->count, m->size - m->count,
-                                bits, nr_bits);
-               m->count += len;
-               return 0;
+int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+               unsigned int nr_bits)
+{
+       if (m->count < m->size) {
+               int len = bitmap_scnlistprintf(m->buf + m->count,
+                               m->size - m->count, bits, nr_bits);
+               if (m->count + len < m->size) {
+                       m->count += len;
+                       return 0;
+               }
        }
        m->count = m->size;
        return -1;
 }
+EXPORT_SYMBOL(seq_bitmap_list);
 
 static void *single_start(struct seq_file *p, loff_t *pos)
 {
index 7efe1000f99d23e18b0825d7b8f4c7ebc52df53d..cee97f14af3bf4896bedcbc48aec2dfee97c89f2 100644 (file)
@@ -88,6 +88,7 @@ struct thread_info {
 #define TIF_RESTORE_SIGMASK    9       /* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG     16      /* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE             17
+#define TIF_FREEZE             18      /* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME     (1<<TIF_NOTIFY_RESUME)
@@ -95,6 +96,7 @@ struct thread_info {
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
 #define _TIF_RESTORE_SIGMASK   (1<<TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
+#define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK         0x0000FFFE      /* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK      0x0000FFFF      /* work to do on any return to u-space */
index 71ef3f0b9685f8adbfcc74a7717b8f7baf2cd496..89061c1a67d473f42c039780e0dcfa1506bec640 100644 (file)
@@ -84,12 +84,12 @@ static inline unsigned int get_rtc_time(struct rtc_time *time)
 
        if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
        {
-               BCD_TO_BIN(time->tm_sec);
-               BCD_TO_BIN(time->tm_min);
-               BCD_TO_BIN(time->tm_hour);
-               BCD_TO_BIN(time->tm_mday);
-               BCD_TO_BIN(time->tm_mon);
-               BCD_TO_BIN(time->tm_year);
+               time->tm_sec = bcd2bin(time->tm_sec);
+               time->tm_min = bcd2bin(time->tm_min);
+               time->tm_hour = bcd2bin(time->tm_hour);
+               time->tm_mday = bcd2bin(time->tm_mday);
+               time->tm_mon = bcd2bin(time->tm_mon);
+               time->tm_year = bcd2bin(time->tm_year);
        }
 
 #ifdef CONFIG_MACH_DECSTATION
@@ -159,12 +159,12 @@ static inline int set_rtc_time(struct rtc_time *time)
 
        if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
            || RTC_ALWAYS_BCD) {
-               BIN_TO_BCD(sec);
-               BIN_TO_BCD(min);
-               BIN_TO_BCD(hrs);
-               BIN_TO_BCD(day);
-               BIN_TO_BCD(mon);
-               BIN_TO_BCD(yrs);
+               sec = bin2bcd(sec);
+               min = bin2bcd(min);
+               hrs = bin2bcd(hrs);
+               day = bin2bcd(day);
+               mon = bin2bcd(mon);
+               yrs = bin2bcd(yrs);
        }
 
        save_control = CMOS_READ(RTC_CONTROL);
index abc002798a2b07a9fcb502ab0f93c1c7f502ef50..af0fda46e94bc969978a9572221ac2843f23bae2 100644 (file)
@@ -52,5 +52,6 @@ struct thread_info {
 #define TIF_DELAYED_TRACE      14      /* single step a syscall */
 #define TIF_SYSCALL_TRACE      15      /* syscall trace active */
 #define TIF_MEMDIE             16
+#define TIF_FREEZE             17      /* thread is freezing for suspend */
 
 #endif /* _ASM_M68K_THREAD_INFO_H */
index 9f812741c3553d41a45e8dada845c7bb57fef86a..0407959da489d54a72b4ccab371cc21e996e79a3 100644 (file)
@@ -58,6 +58,7 @@ struct thread_info {
 #define TIF_32BIT               4       /* 32 bit binary */
 #define TIF_MEMDIE             5
 #define TIF_RESTORE_SIGMASK    6       /* restore saved signal mask */
+#define TIF_FREEZE             7       /* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
@@ -65,6 +66,7 @@ struct thread_info {
 #define _TIF_POLLING_NRFLAG    (1 << TIF_POLLING_NRFLAG)
 #define _TIF_32BIT             (1 << TIF_32BIT)
 #define _TIF_RESTORE_SIGMASK   (1 << TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE            (1 << TIF_FREEZE)
 
 #define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | \
                                  _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
index e07e72846c7a339544e859d4295eba4452891197..62274ab9471fe6f8eb0de14368acf05b65e17691 100644 (file)
@@ -69,6 +69,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE             5
 #define TIF_SYSCALL_AUDIT      6
 #define TIF_RESTORE_SIGMASK    7
+#define TIF_FREEZE             16      /* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
@@ -77,5 +78,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_MEMDIE            (1 << TIF_MEMDIE)
 #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
 #define _TIF_RESTORE_SIGMASK   (1 << TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE            (1 << TIF_FREEZE)
 
 #endif
index 7e4131dd546c5d62655b12fc89460a5e44b31a82..0f4fe1faf9ba46fed7f9c689f2bb2652fe25923b 100644 (file)
@@ -134,6 +134,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE             5
 #define TIF_RESTORE_SIGMASK    6       /* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG     16      /* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_FREEZE             17      /* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
@@ -142,6 +143,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_IRET              (1<<TIF_IRET)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK   (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK         0x0000FFFE      /* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK      0x0000FFFF      /* work to do on any return to u-space */
index bf9aca548f14a931f33e35bfed8a8efb4036e8e8..e531783e5d78db542fb4e86c602698bf5824d72b 100644 (file)
@@ -183,6 +183,7 @@ unifdef-y += auto_fs.h
 unifdef-y += auxvec.h
 unifdef-y += binfmts.h
 unifdef-y += blktrace_api.h
+unifdef-y += byteorder.h
 unifdef-y += capability.h
 unifdef-y += capi.h
 unifdef-y += cciss_ioctl.h
@@ -340,6 +341,7 @@ unifdef-y += soundcard.h
 unifdef-y += stat.h
 unifdef-y += stddef.h
 unifdef-y += string.h
+unifdef-y += swab.h
 unifdef-y += synclink.h
 unifdef-y += sysctl.h
 unifdef-y += tcp.h
index 0a24d5550eb3a082725a150856af3055b77f4ae2..bee52abb8a4dbfd46e53f650d7d7dbca881a9169 100644 (file)
@@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
  * BDI_CAP_READ_MAP:       Can be mapped for reading
  * BDI_CAP_WRITE_MAP:      Can be mapped for writing
  * BDI_CAP_EXEC_MAP:       Can be mapped for execution
+ *
+ * BDI_CAP_SWAP_BACKED:    Count shmem/tmpfs objects as swap-backed.
  */
 #define BDI_CAP_NO_ACCT_DIRTY  0x00000001
 #define BDI_CAP_NO_WRITEBACK   0x00000002
@@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 #define BDI_CAP_WRITE_MAP      0x00000020
 #define BDI_CAP_EXEC_MAP       0x00000040
 #define BDI_CAP_NO_ACCT_WB     0x00000080
+#define BDI_CAP_SWAP_BACKED    0x00000100
 
 #define BDI_CAP_VMFLAGS \
        (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
                                      BDI_CAP_NO_WRITEBACK));
 }
 
+static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
+{
+       return bdi->capabilities & BDI_CAP_SWAP_BACKED;
+}
+
 static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 {
        return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping)
        return bdi_cap_account_dirty(mapping->backing_dev_info);
 }
 
+static inline bool mapping_cap_swap_backed(struct address_space *mapping)
+{
+       return bdi_cap_swap_backed(mapping->backing_dev_info);
+}
+
 #endif         /* _LINUX_BACKING_DEV_H */
index 7ac518e3c152d9da46f53ed097adb3019245dd5f..22ea563ba3eb169f9e04aac1233b404923c42d6e 100644 (file)
@@ -1,12 +1,3 @@
-/* Permission is hereby granted to copy, modify and redistribute this code
- * in terms of the GNU Library General Public License, Version 2 or later,
- * at your option.
- */
-
-/* macros to translate to/from binary and binary-coded decimal (frequently
- * found in RTC chips).
- */
-
 #ifndef _BCD_H
 #define _BCD_H
 
@@ -15,11 +6,4 @@
 unsigned bcd2bin(unsigned char val) __attribute_const__;
 unsigned char bin2bcd(unsigned val) __attribute_const__;
 
-#define BCD2BIN(val)   bcd2bin(val)
-#define BIN2BCD(val)   bin2bcd(val)
-
-/* backwards compat */
-#define BCD_TO_BIN(val) ((val)=BCD2BIN(val))
-#define BIN_TO_BCD(val) ((val)=BIN2BCD(val))
-
 #endif /* _BCD_H */
index 89781fd48859bb957bbaafabce4c87ad0affd2a0..1abfe664c4446a9a64ea182ce680e6c08d1a3086 100644 (file)
@@ -110,7 +110,6 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
 extern int bitmap_scnprintf(char *buf, unsigned int len,
                        const unsigned long *src, int nbits);
-extern int bitmap_scnprintf_len(unsigned int nr_bits);
 extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
                        unsigned long *dst, int nbits);
 extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
index eadaab44015f62e4fa04ab003828945d6c6bf503..3ce64b90118c20346a704d1c727ec9450d7ccb95 100644 (file)
@@ -322,7 +322,7 @@ static inline void wait_on_buffer(struct buffer_head *bh)
 
 static inline int trylock_buffer(struct buffer_head *bh)
 {
-       return likely(!test_and_set_bit(BH_Lock, &bh->b_state));
+       return likely(!test_and_set_bit_lock(BH_Lock, &bh->b_state));
 }
 
 static inline void lock_buffer(struct buffer_head *bh)
index 1133d5f9d8181c4811362c6992b29b157a0dbb4e..fbaa7f9cee32253c38212318561bd9754a6674d1 100644 (file)
@@ -1,3 +1,4 @@
 unifdef-y += big_endian.h
 unifdef-y += little_endian.h
 unifdef-y += swab.h
+unifdef-y += swabb.h
index 44f95b92393b89b19186166232efa4a797f7314a..1cba3f3efe5f7aaa2ee0ef66afaffbdf3b5fab64 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
 
 #define __constant_htonl(x) ((__force __be32)(__u32)(x))
 #define __constant_ntohl(x) ((__force __u32)(__be32)(x))
index 4cc170a3176219b69ab73bec0035c0868a0362a2..cedc1b5a289c63a4c3102d34db02260b37c1223b 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
 
 #define __constant_htonl(x) ((__force __be32)___constant_swab32((x)))
 #define __constant_ntohl(x) ___constant_swab32((__force __be32)(x))
index 30934e4bfaab7b9e3197e0d7410579c0737bf2c7..8b00f6643e934444f323c7aa3aeac0aff4b2af43 100644 (file)
@@ -9,12 +9,12 @@
  */
 
 #include <linux/sched.h>
-#include <linux/kref.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
 #include <linux/cgroupstats.h>
 #include <linux/prio_heap.h>
+#include <linux/rwsem.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -137,6 +137,15 @@ struct cgroup {
         * release_list_lock
         */
        struct list_head release_list;
+
+       /* pids_mutex protects the fields below */
+       struct rw_semaphore pids_mutex;
+       /* Array of process ids in the cgroup */
+       pid_t *tasks_pids;
+       /* How many files are using the current tasks_pids array */
+       int pids_use_count;
+       /* Length of the current tasks_pids array */
+       int pids_length;
 };
 
 /* A css_set is a structure holding pointers to a set of
@@ -149,7 +158,7 @@ struct cgroup {
 struct css_set {
 
        /* Reference count */
-       struct kref ref;
+       atomic_t refcount;
 
        /*
         * List running through all cgroup groups in the same hash
@@ -394,6 +403,9 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
 
+void cgroup_mm_owner_callbacks(struct task_struct *old,
+                              struct task_struct *new);
+
 #else /* !CONFIG_CGROUPS */
 
 static inline int cgroup_init_early(void) { return 0; }
@@ -412,15 +424,9 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
        return -EINVAL;
 }
 
+static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
+                                            struct task_struct *new) {}
+
 #endif /* !CONFIG_CGROUPS */
 
-#ifdef CONFIG_MM_OWNER
-extern void
-cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new);
-#else /* !CONFIG_MM_OWNER */
-static inline void
-cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
-{
-}
-#endif /* CONFIG_MM_OWNER */
 #endif /* _LINUX_CGROUP_H */
index e2877454ec82df3205c0251f9c9759db9334dedb..9c22396e8b50355e3058e93807f293112622079f 100644 (file)
@@ -48,3 +48,9 @@ SUBSYS(devices)
 #endif
 
 /* */
+
+#ifdef CONFIG_CGROUP_FREEZER
+SUBSYS(freezer)
+#endif
+
+/* */
index 025e4f5751034ac15139a704a63941a0f1d6dbce..0acf3b737e2ea179c30d9c9fa69076fbdf488a1c 100644 (file)
@@ -8,12 +8,9 @@
 #include <linux/proc_fs.h>
 
 #define ELFCORE_ADDR_MAX       (-1ULL)
+#define ELFCORE_ADDR_ERR       (-2ULL)
 
-#ifdef CONFIG_PROC_VMCORE
 extern unsigned long long elfcorehdr_addr;
-#else
-static const unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-#endif
 
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
                                                unsigned long, int);
@@ -28,10 +25,43 @@ extern struct proc_dir_entry *proc_vmcore;
 
 #define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
 
+/*
+ * is_kdump_kernel() checks whether this kernel is booting after a panic of
+ * previous kernel or not. This is determined by checking if previous kernel
+ * has passed the elf core header address on command line.
+ *
+ * This is not just a test if CONFIG_CRASH_DUMP is enabled or not. It will
+ * return 1 if CONFIG_CRASH_DUMP=y and if kernel is booting after a panic of
+ * previous kernel.
+ */
+
 static inline int is_kdump_kernel(void)
 {
        return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
 }
+
+/* is_vmcore_usable() checks if the kernel is booting after a panic and
+ * the vmcore region is usable.
+ *
+ * This makes use of the fact that due to alignment -2ULL is not
+ * a valid pointer, much in the vain of IS_ERR(), except
+ * dealing directly with an unsigned long long rather than a pointer.
+ */
+
+static inline int is_vmcore_usable(void)
+{
+       return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0;
+}
+
+/* vmcore_unusable() marks the vmcore as unusable,
+ * without disturbing the logic of is_kdump_kernel()
+ */
+
+static inline void vmcore_unusable(void)
+{
+       if (is_kdump_kernel())
+               elfcorehdr_addr = ELFCORE_ADDR_ERR;
+}
 #else /* !CONFIG_CRASH_DUMP */
 static inline int is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
index 159d9b476cd7f404c0ae33facd7189e7efe477ad..d14f029184832bd300ad7856c7cacbe467101c5d 100644 (file)
@@ -380,6 +380,8 @@ struct ext3_inode {
 #define EXT3_MOUNT_QUOTA               0x80000 /* Some quota option set */
 #define EXT3_MOUNT_USRQUOTA            0x100000 /* "old" user quota */
 #define EXT3_MOUNT_GRPQUOTA            0x200000 /* "old" group quota */
+#define EXT3_MOUNT_DATA_ERR_ABORT      0x400000 /* Abort on file data write
+                                                 * error in ordered mode */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
index 531ccd5f5960b0fe5818fdb768d3beb6bd3f3ea7..75a81eaf34309c43ba4da319babecd5e3bd00ca0 100644 (file)
@@ -808,6 +808,7 @@ struct fb_tile_ops {
 struct fb_info {
        int node;
        int flags;
+       struct mutex lock;              /* Lock for open/release/ioctl funcs */
        struct fb_var_screeninfo var;   /* Current var */
        struct fb_fix_screeninfo fix;   /* Current fix */
        struct fb_monspecs monspecs;    /* Current Monitor specs */
index deddeedf32571f12015de116f729ae666f74ede4..8f225339eee9e49329b842803d89c2b4c90f9d38 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/sched.h>
 #include <linux/wait.h>
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_FREEZER
 /*
  * Check if a process has been frozen
  */
@@ -39,28 +39,18 @@ static inline void clear_freeze_flag(struct task_struct *p)
        clear_tsk_thread_flag(p, TIF_FREEZE);
 }
 
+static inline bool should_send_signal(struct task_struct *p)
+{
+       return !(p->flags & PF_FREEZER_NOSIG);
+}
+
 /*
  * Wake up a frozen process
- *
- * task_lock() is taken to prevent the race with refrigerator() which may
- * occur if the freezing of tasks fails.  Namely, without the lock, if the
- * freezing of tasks failed, thaw_tasks() might have run before a task in
- * refrigerator() could call frozen_process(), in which case the task would be
- * frozen and no one would thaw it.
  */
-static inline int thaw_process(struct task_struct *p)
-{
-       task_lock(p);
-       if (frozen(p)) {
-               p->flags &= ~PF_FROZEN;
-               task_unlock(p);
-               wake_up_process(p);
-               return 1;
-       }
-       clear_freeze_flag(p);
-       task_unlock(p);
-       return 0;
-}
+extern int __thaw_process(struct task_struct *p);
+
+/* Takes and releases task alloc lock using task_lock() */
+extern int thaw_process(struct task_struct *p);
 
 extern void refrigerator(void);
 extern int freeze_processes(void);
@@ -75,6 +65,15 @@ static inline int try_to_freeze(void)
                return 0;
 }
 
+extern bool freeze_task(struct task_struct *p, bool sig_only);
+extern void cancel_freezing(struct task_struct *p);
+
+#ifdef CONFIG_CGROUP_FREEZER
+extern int cgroup_frozen(struct task_struct *task);
+#else /* !CONFIG_CGROUP_FREEZER */
+static inline int cgroup_frozen(struct task_struct *task) { return 0; }
+#endif /* !CONFIG_CGROUP_FREEZER */
+
 /*
  * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
  * calls wait_for_completion(&vfork) and reset right after it returns from this
@@ -166,7 +165,7 @@ static inline void set_freezable_with_signal(void)
        } while (try_to_freeze());                                      \
        __retval;                                                       \
 })
-#else /* !CONFIG_PM_SLEEP */
+#else /* !CONFIG_FREEZER */
 static inline int frozen(struct task_struct *p) { return 0; }
 static inline int freezing(struct task_struct *p) { return 0; }
 static inline void set_freeze_flag(struct task_struct *p) {}
@@ -191,6 +190,6 @@ static inline void set_freezable_with_signal(void) {}
 #define wait_event_freezable_timeout(wq, condition, timeout)           \
                wait_event_interruptible_timeout(wq, condition, timeout)
 
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* !CONFIG_FREEZER */
 
 #endif /* FREEZER_H_INCLUDED */
index 7ebbcb1c9ba4e5d6a5a72290c002f37c09617fce..35d4f6342fac82f3158b88e23790aa03a2140c79 100644 (file)
@@ -816,6 +816,9 @@ struct journal_s
 #define JFS_FLUSHED    0x008   /* The journal superblock has been flushed */
 #define JFS_LOADED     0x010   /* The journal superblock has been loaded */
 #define JFS_BARRIER    0x020   /* Use IDE barriers */
+#define JFS_ABORT_ON_SYNCDATA_ERR      0x040  /* Abort the journal on file
+                                               * data write error in ordered
+                                               * mode */
 
 /*
  * Function declarations for the journaling transaction and buffer
index fdf3967e13975a4dc24ec7c37026295ea4fa8f0e..1fbe14d39521d728dd877d263b278210bd9d3ed5 100644 (file)
@@ -27,16 +27,13 @@ struct mm_struct;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
-#define page_reset_bad_cgroup(page)    ((page)->page_cgroup = 0)
-
-extern struct page_cgroup *page_get_page_cgroup(struct page *page);
 extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask);
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                        gfp_t gfp_mask);
+extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern void mem_cgroup_move_lists(struct page *page, bool active);
 extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
 
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -44,7 +41,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
                                        unsigned long *scanned, int order,
                                        int mode, struct zone *z,
                                        struct mem_cgroup *mem_cont,
-                                       int active);
+                                       int active, int file);
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
@@ -69,21 +66,11 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
 extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
                                                        int priority);
 
-extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
-                               struct zone *zone, int priority);
-extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
-                               struct zone *zone, int priority);
-
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
-static inline void page_reset_bad_cgroup(struct page *page)
-{
-}
+extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
+                                       int priority, enum lru_list lru);
 
-static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
-{
-       return NULL;
-}
 
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
 static inline int mem_cgroup_charge(struct page *page,
                                        struct mm_struct *mm, gfp_t gfp_mask)
 {
@@ -159,14 +146,9 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 {
 }
 
-static inline long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
-                                       struct zone *zone, int priority)
-{
-       return 0;
-}
-
-static inline long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
-                                       struct zone *zone, int priority)
+static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
+                                       struct zone *zone, int priority,
+                                       enum lru_list lru)
 {
        return 0;
 }
index 03aea612d28450f24f96274c922a8da987bb918b..3f34005068d420b2b1766afdec804529dc2772d9 100644 (file)
@@ -7,7 +7,6 @@
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
 #ifdef CONFIG_MIGRATION
-extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
 extern int putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
                        struct page *, struct page *);
@@ -21,8 +20,6 @@ extern int migrate_vmas(struct mm_struct *mm,
                const nodemask_t *from, const nodemask_t *to,
                unsigned long flags);
 #else
-static inline int isolate_lru_page(struct page *p, struct list_head *list)
-                                       { return -ENOSYS; }
 static inline int putback_lru_pages(struct list_head *l) { return 0; }
 static inline int migrate_pages(struct list_head *l, new_page_t x,
                unsigned long private) { return -ENOSYS; }
index c61ba10768ea48d6691cd58678476f35c1c878f1..ffee2f74341856275ead62f71c6093c8de457199 100644 (file)
@@ -131,6 +131,11 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_SequentialReadHint(v)       ((v)->vm_flags & VM_SEQ_READ)
 #define VM_RandomReadHint(v)           ((v)->vm_flags & VM_RAND_READ)
 
+/*
+ * special vmas that are non-mergable, non-mlock()able
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+
 /*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
@@ -700,10 +705,10 @@ static inline int page_mapped(struct page *page)
 extern void show_free_areas(void);
 
 #ifdef CONFIG_SHMEM
-int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
 #else
 static inline int shmem_lock(struct file *file, int lock,
-                            struct user_struct *user)
+                           struct user_struct *user)
 {
        return 0;
 }
index 895bc4e93039cdba7f6585b484abed3b9eb4a920..c948350c378e93cb9144e9c17c52f3445e8f4a80 100644 (file)
-static inline void
-add_page_to_active_list(struct zone *zone, struct page *page)
-{
-       list_add(&page->lru, &zone->active_list);
-       __inc_zone_state(zone, NR_ACTIVE);
-}
+#ifndef LINUX_MM_INLINE_H
+#define LINUX_MM_INLINE_H
 
-static inline void
-add_page_to_inactive_list(struct zone *zone, struct page *page)
+/**
+ * page_is_file_cache - should the page be on a file LRU or anon LRU?
+ * @page: the page to test
+ *
+ * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
+ * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
+ * Used by functions that manipulate the LRU lists, to sort a page
+ * onto the right LRU list.
+ *
+ * We would like to get this info without a page flag, but the state
+ * needs to survive until the page is last deleted from the LRU, which
+ * could be as far down as __page_cache_release.
+ */
+static inline int page_is_file_cache(struct page *page)
 {
-       list_add(&page->lru, &zone->inactive_list);
-       __inc_zone_state(zone, NR_INACTIVE);
+       if (PageSwapBacked(page))
+               return 0;
+
+       /* The page is page cache backed by a normal filesystem. */
+       return LRU_FILE;
 }
 
 static inline void
-del_page_from_active_list(struct zone *zone, struct page *page)
+add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
-       list_del(&page->lru);
-       __dec_zone_state(zone, NR_ACTIVE);
+       list_add(&page->lru, &zone->lru[l].list);
+       __inc_zone_state(zone, NR_LRU_BASE + l);
 }
 
 static inline void
-del_page_from_inactive_list(struct zone *zone, struct page *page)
+del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
        list_del(&page->lru);
-       __dec_zone_state(zone, NR_INACTIVE);
+       __dec_zone_state(zone, NR_LRU_BASE + l);
 }
 
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
+       enum lru_list l = LRU_BASE;
+
        list_del(&page->lru);
-       if (PageActive(page)) {
-               __ClearPageActive(page);
-               __dec_zone_state(zone, NR_ACTIVE);
+       if (PageUnevictable(page)) {
+               __ClearPageUnevictable(page);
+               l = LRU_UNEVICTABLE;
        } else {
-               __dec_zone_state(zone, NR_INACTIVE);
+               if (PageActive(page)) {
+                       __ClearPageActive(page);
+                       l += LRU_ACTIVE;
+               }
+               l += page_is_file_cache(page);
+       }
+       __dec_zone_state(zone, NR_LRU_BASE + l);
+}
+
+/**
+ * page_lru - which LRU list should a page be on?
+ * @page: the page to test
+ *
+ * Returns the LRU list a page should be on, as an index
+ * into the array of LRU lists.
+ */
+static inline enum lru_list page_lru(struct page *page)
+{
+       enum lru_list lru = LRU_BASE;
+
+       if (PageUnevictable(page))
+               lru = LRU_UNEVICTABLE;
+       else {
+               if (PageActive(page))
+                       lru += LRU_ACTIVE;
+               lru += page_is_file_cache(page);
        }
+
+       return lru;
 }
 
+/**
+ * inactive_anon_is_low - check if anonymous pages need to be deactivated
+ * @zone: zone to check
+ *
+ * Returns true if the zone does not have enough inactive anon pages,
+ * meaning some active anon pages need to be deactivated.
+ */
+static inline int inactive_anon_is_low(struct zone *zone)
+{
+       unsigned long active, inactive;
+
+       active = zone_page_state(zone, NR_ACTIVE_ANON);
+       inactive = zone_page_state(zone, NR_INACTIVE_ANON);
+
+       if (inactive * zone->inactive_ratio < active)
+               return 1;
+
+       return 0;
+}
+#endif
index 9d49fa36bbeff3237e2e0f4925a0ed3ac1ec1b29..fe825471d5aaf9d59440a7f322989004736ea71f 100644 (file)
@@ -94,9 +94,6 @@ struct page {
        void *virtual;                  /* Kernel virtual address (NULL if
                                           not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-       unsigned long page_cgroup;
-#endif
 };
 
 /*
index 428328a05fa123779a832b4b51731bee45984ea6..35a7b5e19465fc89154dd2097f0bd4913e7b2e21 100644 (file)
@@ -81,21 +81,31 @@ struct zone_padding {
 enum zone_stat_item {
        /* First 128 byte cacheline (assuming 64 bit words) */
        NR_FREE_PAGES,
-       NR_INACTIVE,
-       NR_ACTIVE,
+       NR_LRU_BASE,
+       NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+       NR_ACTIVE_ANON,         /*  "     "     "   "       "         */
+       NR_INACTIVE_FILE,       /*  "     "     "   "       "         */
+       NR_ACTIVE_FILE,         /*  "     "     "   "       "         */
+#ifdef CONFIG_UNEVICTABLE_LRU
+       NR_UNEVICTABLE,         /*  "     "     "   "       "         */
+       NR_MLOCK,               /* mlock()ed pages found and moved off LRU */
+#else
+       NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */
+       NR_MLOCK = NR_ACTIVE_FILE,
+#endif
        NR_ANON_PAGES,  /* Mapped anonymous pages */
        NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
                           only modified from process context */
        NR_FILE_PAGES,
        NR_FILE_DIRTY,
        NR_WRITEBACK,
-       /* Second 128 byte cacheline */
        NR_SLAB_RECLAIMABLE,
        NR_SLAB_UNRECLAIMABLE,
        NR_PAGETABLE,           /* used for pagetables */
        NR_UNSTABLE_NFS,        /* NFS unstable pages */
        NR_BOUNCE,
        NR_VMSCAN_WRITE,
+       /* Second 128 byte cacheline */
        NR_WRITEBACK_TEMP,      /* Writeback using temporary buffers */
 #ifdef CONFIG_NUMA
        NUMA_HIT,               /* allocated in intended node */
@@ -107,6 +117,55 @@ enum zone_stat_item {
 #endif
        NR_VM_ZONE_STAT_ITEMS };
 
+/*
+ * We do arithmetic on the LRU lists in various places in the code,
+ * so it is important to keep the active lists LRU_ACTIVE higher in
+ * the array than the corresponding inactive lists, and to keep
+ * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
+ *
+ * This has to be kept in sync with the statistics in zone_stat_item
+ * above and the descriptions in vmstat_text in mm/vmstat.c
+ */
+#define LRU_BASE 0
+#define LRU_ACTIVE 1
+#define LRU_FILE 2
+
+enum lru_list {
+       LRU_INACTIVE_ANON = LRU_BASE,
+       LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
+       LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
+       LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
+#ifdef CONFIG_UNEVICTABLE_LRU
+       LRU_UNEVICTABLE,
+#else
+       LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */
+#endif
+       NR_LRU_LISTS
+};
+
+#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
+
+#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
+
+static inline int is_file_lru(enum lru_list l)
+{
+       return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
+}
+
+static inline int is_active_lru(enum lru_list l)
+{
+       return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
+}
+
+static inline int is_unevictable_lru(enum lru_list l)
+{
+#ifdef CONFIG_UNEVICTABLE_LRU
+       return (l == LRU_UNEVICTABLE);
+#else
+       return 0;
+#endif
+}
+
 struct per_cpu_pages {
        int count;              /* number of pages in the list */
        int high;               /* high watermark, emptying needed */
@@ -251,10 +310,22 @@ struct zone {
 
        /* Fields commonly accessed by the page reclaim scanner */
        spinlock_t              lru_lock;       
-       struct list_head        active_list;
-       struct list_head        inactive_list;
-       unsigned long           nr_scan_active;
-       unsigned long           nr_scan_inactive;
+       struct {
+               struct list_head list;
+               unsigned long nr_scan;
+       } lru[NR_LRU_LISTS];
+
+       /*
+        * The pageout code in vmscan.c keeps track of how many of the
+        * mem/swap backed and file backed pages are refeferenced.
+        * The higher the rotated/scanned ratio, the more valuable
+        * that cache is.
+        *
+        * The anon LRU stats live in [0], file LRU stats in [1]
+        */
+       unsigned long           recent_rotated[2];
+       unsigned long           recent_scanned[2];
+
        unsigned long           pages_scanned;     /* since last reclaim */
        unsigned long           flags;             /* zone flags, see below */
 
@@ -276,6 +347,12 @@ struct zone {
         */
        int prev_priority;
 
+       /*
+        * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+        * this zone's LRU.  Maintained by the pageout code.
+        */
+       unsigned int inactive_ratio;
+
 
        ZONE_PADDING(_pad2_)
        /* Rarely used or read-mostly fields */
@@ -524,8 +601,11 @@ typedef struct pglist_data {
        struct zone node_zones[MAX_NR_ZONES];
        struct zonelist node_zonelists[MAX_ZONELISTS];
        int nr_zones;
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLAT_NODE_MEM_MAP        /* means !SPARSEMEM */
        struct page *node_mem_map;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+       struct page_cgroup *node_page_cgroup;
+#endif
 #endif
        struct bootmem_data *bdata;
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -854,6 +934,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #endif
 
 struct page;
+struct page_cgroup;
 struct mem_section {
        /*
         * This is, logically, a pointer to an array of struct
@@ -871,6 +952,14 @@ struct mem_section {
 
        /* See declaration of similar field in struct zone */
        unsigned long *pageblock_flags;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+       /*
+        * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
+        * section. (see memcontrol.h/page_cgroup.h about this.)
+        */
+       struct page_cgroup *page_cgroup;
+       unsigned long pad;
+#endif
 };
 
 #ifdef CONFIG_SPARSEMEM_EXTREME
index d6fb115f5a0714794c077ea6ff98ea18243d537e..ee5124ec319e4e036f1f139dff39c428c6732190 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/mtd/flashchip.h>
 #include <linux/mtd/map.h>
 #include <linux/mtd/cfi_endian.h>
+#include <linux/mtd/xip.h>
 
 #ifdef CONFIG_MTD_CFI_I1
 #define cfi_interleave(cfi) 1
@@ -430,7 +431,6 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t
 {
        map_word val;
        uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type);
-
        val = cfi_build_cmd(cmd, map, cfi);
 
        if (prev_val)
@@ -483,6 +483,13 @@ static inline void cfi_udelay(int us)
        }
 }
 
+int __xipram cfi_qry_present(struct map_info *map, __u32 base,
+                            struct cfi_private *cfi);
+int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map,
+                            struct cfi_private *cfi);
+void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map,
+                              struct cfi_private *cfi);
+
 struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size,
                             const char* name);
 struct cfi_fixup {
index 08dd131301c1fe98e87d900f9173cc054f953a84..d4f38c5fd44ec1d1affcc5876e23dfaadc727fd8 100644 (file)
@@ -73,6 +73,10 @@ struct flchip {
        int buffer_write_time;
        int erase_time;
 
+       int word_write_time_max;
+       int buffer_write_time_max;
+       int erase_time_max;
+
        void *priv;
 };
 
index 922636548558943d7af9007074cb8d2e7e91fcb0..eae26bb6430ae0e8db2c70385d1b1c72c3e32d5b 100644 (file)
 #define MTD_ERASE_DONE          0x08
 #define MTD_ERASE_FAILED        0x10
 
+#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff
+
 /* If the erase fails, fail_addr might indicate exactly which block failed.  If
-   fail_addr = 0xffffffff, the failure was not at the device level or was not
+   fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not
    specific to any particular block. */
 struct erase_info {
        struct mtd_info *mtd;
diff --git a/include/linux/mtd/nand-gpio.h b/include/linux/mtd/nand-gpio.h
new file mode 100644 (file)
index 0000000..51534e5
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef __LINUX_MTD_NAND_GPIO_H
+#define __LINUX_MTD_NAND_GPIO_H
+
+#include <linux/mtd/nand.h>
+
+struct gpio_nand_platdata {
+       int     gpio_nce;
+       int     gpio_nwp;
+       int     gpio_cle;
+       int     gpio_ale;
+       int     gpio_rdy;
+       void    (*adjust_parts)(struct gpio_nand_platdata *, size_t);
+       struct mtd_partition *parts;
+       unsigned int num_parts;
+       unsigned int options;
+       int     chip_delay;
+};
+
+#endif
index 81774e5facf4e74a2d5dd742bae6bb81a1922b17..733d3f3b4eb8bbda4101a6a1a37eba84d424b9f0 100644 (file)
@@ -248,6 +248,7 @@ struct nand_hw_control {
  * @read_page_raw:     function to read a raw page without ECC
  * @write_page_raw:    function to write a raw page without ECC
  * @read_page: function to read a page according to the ecc generator requirements
+ * @read_subpage:      function to read parts of the page covered by ECC.
  * @write_page:        function to write a page according to the ecc generator requirements
  * @read_oob:  function to read chip OOB data
  * @write_oob: function to write chip OOB data
index d1b310c92eb45a9272c8ba2ac30c26becb856a5e..0c6bbe28f38ce91e01151faa517c75688300488b 100644 (file)
 #define ONENAND_SYS_CFG1_INT           (1 << 6)
 #define ONENAND_SYS_CFG1_IOBE          (1 << 5)
 #define ONENAND_SYS_CFG1_RDY_CONF      (1 << 4)
+#define ONENAND_SYS_CFG1_HF            (1 << 2)
+#define ONENAND_SYS_CFG1_SYNC_WRITE    (1 << 1)
 
 /*
  * Controller Status Register F240h (R)
index 5014f7a9f5dfe57a8df546d4cee4edb2e6a065df..c92b4d439609feec0640b953fedc502fd9a8890d 100644 (file)
@@ -73,7 +73,6 @@ struct device;
 struct device_node;
 
 int __devinit of_mtd_parse_partitions(struct device *dev,
-                                      struct mtd_info *mtd,
                                       struct device_node *node,
                                       struct mtd_partition **pparts);
 
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
new file mode 100644 (file)
index 0000000..e77c1ce
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ * SuperH FLCTL nand controller
+ *
+ * Copyright Â© 2008 Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef __SH_FLCTL_H__
+#define __SH_FLCTL_H__
+
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+
+/* FLCTL registers */
+#define FLCMNCR(f)             (f->reg + 0x0)
+#define FLCMDCR(f)             (f->reg + 0x4)
+#define FLCMCDR(f)             (f->reg + 0x8)
+#define FLADR(f)               (f->reg + 0xC)
+#define FLADR2(f)              (f->reg + 0x3C)
+#define FLDATAR(f)             (f->reg + 0x10)
+#define FLDTCNTR(f)            (f->reg + 0x14)
+#define FLINTDMACR(f)          (f->reg + 0x18)
+#define FLBSYTMR(f)            (f->reg + 0x1C)
+#define FLBSYCNT(f)            (f->reg + 0x20)
+#define FLDTFIFO(f)            (f->reg + 0x24)
+#define FLECFIFO(f)            (f->reg + 0x28)
+#define FLTRCR(f)              (f->reg + 0x2C)
+#define        FL4ECCRESULT0(f)        (f->reg + 0x80)
+#define        FL4ECCRESULT1(f)        (f->reg + 0x84)
+#define        FL4ECCRESULT2(f)        (f->reg + 0x88)
+#define        FL4ECCRESULT3(f)        (f->reg + 0x8C)
+#define        FL4ECCCR(f)             (f->reg + 0x90)
+#define        FL4ECCCNT(f)            (f->reg + 0x94)
+#define        FLERRADR(f)             (f->reg + 0x98)
+
+/* FLCMNCR control bits */
+#define ECCPOS2                (0x1 << 25)
+#define _4ECCCNTEN     (0x1 << 24)
+#define _4ECCEN                (0x1 << 23)
+#define _4ECCCORRECT   (0x1 << 22)
+#define SNAND_E                (0x1 << 18)     /* SNAND (0=512 1=2048)*/
+#define QTSEL_E                (0x1 << 17)
+#define ENDIAN         (0x1 << 16)     /* 1 = little endian */
+#define FCKSEL_E       (0x1 << 15)
+#define ECCPOS_00      (0x00 << 12)
+#define ECCPOS_01      (0x01 << 12)
+#define ECCPOS_02      (0x02 << 12)
+#define ACM_SACCES_MODE        (0x01 << 10)
+#define NANWF_E                (0x1 << 9)
+#define SE_D           (0x1 << 8)      /* Spare area disable */
+#define        CE1_ENABLE      (0x1 << 4)      /* Chip Enable 1 */
+#define        CE0_ENABLE      (0x1 << 3)      /* Chip Enable 0 */
+#define        TYPESEL_SET     (0x1 << 0)
+
+/* FLCMDCR control bits */
+#define ADRCNT2_E      (0x1 << 31)     /* 5byte address enable */
+#define ADRMD_E                (0x1 << 26)     /* Sector address access */
+#define CDSRC_E                (0x1 << 25)     /* Data buffer selection */
+#define DOSR_E         (0x1 << 24)     /* Status read check */
+#define SELRW          (0x1 << 21)     /*  0:read 1:write */
+#define DOADR_E                (0x1 << 20)     /* Address stage execute */
+#define ADRCNT_1       (0x00 << 18)    /* Address data bytes: 1byte */
+#define ADRCNT_2       (0x01 << 18)    /* Address data bytes: 2byte */
+#define ADRCNT_3       (0x02 << 18)    /* Address data bytes: 3byte */
+#define ADRCNT_4       (0x03 << 18)    /* Address data bytes: 4byte */
+#define DOCMD2_E       (0x1 << 17)     /* 2nd cmd stage execute */
+#define DOCMD1_E       (0x1 << 16)     /* 1st cmd stage execute */
+
+/* FLTRCR control bits */
+#define TRSTRT         (0x1 << 0)      /* translation start */
+#define TREND          (0x1 << 1)      /* translation end */
+
+/* FL4ECCCR control bits */
+#define        _4ECCFA         (0x1 << 2)      /* 4 symbols correct fault */
+#define        _4ECCEND        (0x1 << 1)      /* 4 symbols end */
+#define        _4ECCEXST       (0x1 << 0)      /* 4 symbols exist */
+
+#define INIT_FL4ECCRESULT_VAL  0x03FF03FF
+#define LOOP_TIMEOUT_MAX       0x00010000
+
+#define mtd_to_flctl(mtd)      container_of(mtd, struct sh_flctl, mtd)
+
+struct sh_flctl {
+       struct mtd_info         mtd;
+       struct nand_chip        chip;
+       void __iomem            *reg;
+
+       uint8_t done_buff[2048 + 64];   /* max size 2048 + 64 */
+       int     read_bytes;
+       int     index;
+       int     seqin_column;           /* column in SEQIN cmd */
+       int     seqin_page_addr;        /* page_addr in SEQIN cmd */
+       uint32_t seqin_read_cmd;                /* read cmd in SEQIN cmd */
+       int     erase1_page_addr;       /* page_addr in ERASE1 cmd */
+       uint32_t erase_ADRCNT;          /* bits of FLCMDCR in ERASE1 cmd */
+       uint32_t rw_ADRCNT;     /* bits of FLCMDCR in READ WRITE cmd */
+
+       int     hwecc_cant_correct[4];
+
+       unsigned page_size:1;   /* NAND page size (0 = 512, 1 = 2048) */
+       unsigned hwecc:1;       /* Hardware ECC (0 = disabled, 1 = enabled) */
+};
+
+struct sh_flctl_platform_data {
+       struct mtd_partition    *parts;
+       int                     nr_parts;
+       unsigned long           flcmncr_val;
+
+       unsigned has_hwecc:1;
+};
+
+#endif /* __SH_FLCTL_H__ */
index c74d3e87531420faf4e6a65b075ae078ff6277c3..b12f93a3c345f781b3edd26716f193efb1fdf0d4 100644 (file)
@@ -93,6 +93,11 @@ enum pageflags {
        PG_mappedtodisk,        /* Has blocks allocated on-disk */
        PG_reclaim,             /* To be reclaimed asap */
        PG_buddy,               /* Page is free, on buddy lists */
+       PG_swapbacked,          /* Page is backed by RAM/swap */
+#ifdef CONFIG_UNEVICTABLE_LRU
+       PG_unevictable,         /* Page is "unevictable"  */
+       PG_mlocked,             /* Page is vma mlocked */
+#endif
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
        PG_uncached,            /* Page has been mapped as uncached */
 #endif
@@ -161,6 +166,18 @@ static inline int Page##uname(struct page *page)                   \
 #define TESTSCFLAG(uname, lname)                                       \
        TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
 
+#define SETPAGEFLAG_NOOP(uname)                                                \
+static inline void SetPage##uname(struct page *page) {  }
+
+#define CLEARPAGEFLAG_NOOP(uname)                                      \
+static inline void ClearPage##uname(struct page *page) {  }
+
+#define __CLEARPAGEFLAG_NOOP(uname)                                    \
+static inline void __ClearPage##uname(struct page *page) {  }
+
+#define TESTCLEARFLAG_FALSE(uname)                                     \
+static inline int TestClearPage##uname(struct page *page) { return 0; }
+
 struct page;   /* forward declaration */
 
 TESTPAGEFLAG(Locked, locked)
@@ -169,6 +186,7 @@ PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
 PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
 PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
+       TESTCLEARFLAG(Active, active)
 __PAGEFLAG(Slab, slab)
 PAGEFLAG(Checked, checked)             /* Used by some filesystems */
 PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)    /* Xen */
@@ -176,6 +194,7 @@ PAGEFLAG(SavePinned, savepinned);                   /* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
        __SETPAGEFLAG(Private, private)
+PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 
 __PAGEFLAG(SlobPage, slob_page)
 __PAGEFLAG(SlobFree, slob_free)
@@ -211,6 +230,25 @@ PAGEFLAG(SwapCache, swapcache)
 PAGEFLAG_FALSE(SwapCache)
 #endif
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
+       TESTCLEARFLAG(Unevictable, unevictable)
+
+#define MLOCK_PAGES 1
+PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
+       TESTSCFLAG(Mlocked, mlocked)
+
+#else
+
+#define MLOCK_PAGES 0
+PAGEFLAG_FALSE(Mlocked)
+       SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
+
+PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
+       SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
+       __CLEARPAGEFLAG_NOOP(Unevictable)
+#endif
+
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 PAGEFLAG(Uncached, uncached)
 #else
@@ -326,15 +364,25 @@ static inline void __ClearPageTail(struct page *page)
 
 #endif /* !PAGEFLAGS_EXTENDED */
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+#define __PG_UNEVICTABLE       (1 << PG_unevictable)
+#define __PG_MLOCKED           (1 << PG_mlocked)
+#else
+#define __PG_UNEVICTABLE       0
+#define __PG_MLOCKED           0
+#endif
+
 #define PAGE_FLAGS     (1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
                         1 << PG_buddy | 1 << PG_writeback | \
-                        1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active)
+                        1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
+                        __PG_UNEVICTABLE | __PG_MLOCKED)
 
 /*
  * Flags checked in bad_page().  Pages on the free list should not have
  * these flags set.  It they are, there is a problem.
  */
-#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | 1 << PG_reclaim | 1 << PG_dirty)
+#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \
+               1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked)
 
 /*
  * Flags checked when a page is freed.  Pages being freed should not have
@@ -347,7 +395,8 @@ static inline void __ClearPageTail(struct page *page)
  * Pages being prepped should not have these flags set.  It they are, there
  * is a problem.
  */
-#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | 1 << PG_reserved | 1 << PG_dirty)
+#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \
+               1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked)
 
 #endif /* !__GENERATING_BOUNDS_H */
 #endif /* PAGE_FLAGS_H */
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
new file mode 100644 (file)
index 0000000..0fd39f2
--- /dev/null
@@ -0,0 +1,103 @@
+#ifndef __LINUX_PAGE_CGROUP_H
+#define __LINUX_PAGE_CGROUP_H
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#include <linux/bit_spinlock.h>
+/*
+ * Page Cgroup can be considered as an extended mem_map.
+ * A page_cgroup page is associated with every page descriptor. The
+ * page_cgroup helps us identify information about the cgroup
+ * All page cgroups are allocated at boot or memory hotplug event,
+ * then the page cgroup for pfn always exists.
+ */
+struct page_cgroup {
+       unsigned long flags;
+       struct mem_cgroup *mem_cgroup;
+       struct page *page;
+       struct list_head lru;           /* per cgroup LRU list */
+};
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat);
+void __init page_cgroup_init(void);
+struct page_cgroup *lookup_page_cgroup(struct page *page);
+
+enum {
+       /* flags for mem_cgroup */
+       PCG_LOCK,  /* page cgroup is locked */
+       PCG_CACHE, /* charged as cache */
+       PCG_USED, /* this object is in use. */
+       /* flags for LRU placement */
+       PCG_ACTIVE, /* page is active in this cgroup */
+       PCG_FILE, /* page is file system backed */
+       PCG_UNEVICTABLE, /* page is unevictableable */
+};
+
+#define TESTPCGFLAG(uname, lname)                      \
+static inline int PageCgroup##uname(struct page_cgroup *pc)    \
+       { return test_bit(PCG_##lname, &pc->flags); }
+
+#define SETPCGFLAG(uname, lname)                       \
+static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
+       { set_bit(PCG_##lname, &pc->flags);  }
+
+#define CLEARPCGFLAG(uname, lname)                     \
+static inline void ClearPageCgroup##uname(struct page_cgroup *pc)      \
+       { clear_bit(PCG_##lname, &pc->flags);  }
+
+/* Cache flag is set only once (at allocation) */
+TESTPCGFLAG(Cache, CACHE)
+
+TESTPCGFLAG(Used, USED)
+CLEARPCGFLAG(Used, USED)
+
+/* LRU management flags (from global-lru definition) */
+TESTPCGFLAG(File, FILE)
+SETPCGFLAG(File, FILE)
+CLEARPCGFLAG(File, FILE)
+
+TESTPCGFLAG(Active, ACTIVE)
+SETPCGFLAG(Active, ACTIVE)
+CLEARPCGFLAG(Active, ACTIVE)
+
+TESTPCGFLAG(Unevictable, UNEVICTABLE)
+SETPCGFLAG(Unevictable, UNEVICTABLE)
+CLEARPCGFLAG(Unevictable, UNEVICTABLE)
+
+static inline int page_cgroup_nid(struct page_cgroup *pc)
+{
+       return page_to_nid(pc->page);
+}
+
+static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
+{
+       return page_zonenum(pc->page);
+}
+
+static inline void lock_page_cgroup(struct page_cgroup *pc)
+{
+       bit_spin_lock(PCG_LOCK, &pc->flags);
+}
+
+static inline int trylock_page_cgroup(struct page_cgroup *pc)
+{
+       return bit_spin_trylock(PCG_LOCK, &pc->flags);
+}
+
+static inline void unlock_page_cgroup(struct page_cgroup *pc)
+{
+       bit_spin_unlock(PCG_LOCK, &pc->flags);
+}
+
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+struct page_cgroup;
+
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+}
+
+static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+       return NULL;
+}
+#endif
+#endif
index 5da31c12101c2e459524f0073a0af4f2f5dd7357..709742be02f0a1bac5336d5e38b172a2b7d9f2b7 100644 (file)
@@ -32,6 +32,34 @@ static inline void mapping_set_error(struct address_space *mapping, int error)
        }
 }
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+#define AS_UNEVICTABLE (__GFP_BITS_SHIFT + 2)  /* e.g., ramdisk, SHM_LOCK */
+
+static inline void mapping_set_unevictable(struct address_space *mapping)
+{
+       set_bit(AS_UNEVICTABLE, &mapping->flags);
+}
+
+static inline void mapping_clear_unevictable(struct address_space *mapping)
+{
+       clear_bit(AS_UNEVICTABLE, &mapping->flags);
+}
+
+static inline int mapping_unevictable(struct address_space *mapping)
+{
+       if (likely(mapping))
+               return test_bit(AS_UNEVICTABLE, &mapping->flags);
+       return !!mapping;
+}
+#else
+static inline void mapping_set_unevictable(struct address_space *mapping) { }
+static inline void mapping_clear_unevictable(struct address_space *mapping) { }
+static inline int mapping_unevictable(struct address_space *mapping)
+{
+       return 0;
+}
+#endif
+
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
        return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
@@ -271,19 +299,19 @@ extern int __lock_page_killable(struct page *page);
 extern void __lock_page_nosync(struct page *page);
 extern void unlock_page(struct page *page);
 
-static inline void set_page_locked(struct page *page)
+static inline void __set_page_locked(struct page *page)
 {
-       set_bit(PG_locked, &page->flags);
+       __set_bit(PG_locked, &page->flags);
 }
 
-static inline void clear_page_locked(struct page *page)
+static inline void __clear_page_locked(struct page *page)
 {
-       clear_bit(PG_locked, &page->flags);
+       __clear_bit(PG_locked, &page->flags);
 }
 
 static inline int trylock_page(struct page *page)
 {
-       return !test_and_set_bit(PG_locked, &page->flags);
+       return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
 }
 
 /*
@@ -410,17 +438,17 @@ extern void __remove_from_page_cache(struct page *page);
 
 /*
  * Like add_to_page_cache_locked, but used to add newly allocated pages:
- * the page is new, so we can just run set_page_locked() against it.
+ * the page is new, so we can just run __set_page_locked() against it.
  */
 static inline int add_to_page_cache(struct page *page,
                struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
 {
        int error;
 
-       set_page_locked(page);
+       __set_page_locked(page);
        error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
        if (unlikely(error))
-               clear_page_locked(page);
+               __clear_page_locked(page);
        return error;
 }
 
index 8eb7fa76c1d025055683c6428388213e9ae2af2c..e90a2cb0291530db1e80ad64bc553da45855d984 100644 (file)
@@ -23,9 +23,9 @@ struct pagevec {
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_release_nonlru(struct pagevec *pvec);
 void __pagevec_free(struct pagevec *pvec);
-void __pagevec_lru_add(struct pagevec *pvec);
-void __pagevec_lru_add_active(struct pagevec *pvec);
+void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 void pagevec_strip(struct pagevec *pvec);
+void pagevec_swap_free(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
                pgoff_t start, unsigned nr_pages);
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
@@ -81,10 +81,36 @@ static inline void pagevec_free(struct pagevec *pvec)
                __pagevec_free(pvec);
 }
 
-static inline void pagevec_lru_add(struct pagevec *pvec)
+static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
+{
+       ____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
+}
+
+static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
+{
+       ____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
+}
+
+static inline void __pagevec_lru_add_file(struct pagevec *pvec)
+{
+       ____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+}
+
+static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
+{
+       ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+}
+
+static inline void pagevec_lru_add_file(struct pagevec *pvec)
+{
+       if (pagevec_count(pvec))
+               __pagevec_lru_add_file(pvec);
+}
+
+static inline void pagevec_lru_add_anon(struct pagevec *pvec)
 {
        if (pagevec_count(pvec))
-               __pagevec_lru_add(pvec);
+               __pagevec_lru_add_anon(pvec);
 }
 
 #endif /* _LINUX_PAGEVEC_H */
index 98dc6243a706424ae099853320ef34edab5a8bc8..acf8f24037cd0cd261772e23cbf8d86386318f2d 100644 (file)
@@ -631,6 +631,8 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
 
 /* ROM control related routines */
+int pci_enable_rom(struct pci_dev *pdev);
+void pci_disable_rom(struct pci_dev *pdev);
 void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size);
 void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom);
 size_t pci_get_rom_size(void __iomem *rom, size_t size);
index ea7416c901d1ef8923adb2417b236cf8633a09f0..22641d5d45df7d29d43bde4b60b4f19b825e30a6 100644 (file)
@@ -94,7 +94,6 @@ extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
                          struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
-extern void ptrace_untrace(struct task_struct *child);
 #define PTRACE_MODE_READ   1
 #define PTRACE_MODE_ATTACH 2
 /* Returns 0 on success, -errno on denial. */
index fed6f5e0b41139070dd9c99b84ee324e7254f63f..89f0564b10c8f7c81951b571e98dbd5ef88a40a4 100644 (file)
@@ -39,18 +39,6 @@ struct anon_vma {
 
 #ifdef CONFIG_MMU
 
-extern struct kmem_cache *anon_vma_cachep;
-
-static inline struct anon_vma *anon_vma_alloc(void)
-{
-       return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
-}
-
-static inline void anon_vma_free(struct anon_vma *anon_vma)
-{
-       kmem_cache_free(anon_vma_cachep, anon_vma);
-}
-
 static inline void anon_vma_lock(struct vm_area_struct *vma)
 {
        struct anon_vma *anon_vma = vma->anon_vma;
@@ -75,6 +63,9 @@ void anon_vma_unlink(struct vm_area_struct *);
 void anon_vma_link(struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
 
+extern struct anon_vma *page_lock_anon_vma(struct page *page);
+extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
+
 /*
  * rmap interfaces called when adding or removing pte of page
  */
@@ -117,6 +108,19 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
  */
 int page_mkclean(struct page *);
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * called in munlock()/munmap() path to check for other vmas holding
+ * the page mlocked.
+ */
+int try_to_munlock(struct page *);
+#else
+static inline int try_to_munlock(struct page *page)
+{
+       return 0;       /* a.k.a. SWAP_SUCCESS */
+}
+#endif
+
 #else  /* !CONFIG_MMU */
 
 #define anon_vma_init()                do {} while (0)
@@ -140,5 +144,6 @@ static inline int page_mkclean(struct page *page)
 #define SWAP_SUCCESS   0
 #define SWAP_AGAIN     1
 #define SWAP_FAIL      2
+#define SWAP_MLOCK     3
 
 #endif /* _LINUX_RMAP_H */
index c226c7b82946ce1d830853c4fd3b9bad3d92fa0d..f52dbd3587a78c4c031759f6934b5d03685b666a 100644 (file)
@@ -403,12 +403,21 @@ extern int get_dumpable(struct mm_struct *mm);
 #define MMF_DUMP_MAPPED_PRIVATE        4
 #define MMF_DUMP_MAPPED_SHARED 5
 #define MMF_DUMP_ELF_HEADERS   6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED  8
 #define MMF_DUMP_FILTER_SHIFT  MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS   5
+#define MMF_DUMP_FILTER_BITS   7
 #define MMF_DUMP_FILTER_MASK \
        (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
 #define MMF_DUMP_FILTER_DEFAULT \
-       ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED))
+       ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
+        (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF     (1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF     0
+#endif
 
 struct sighand_struct {
        atomic_t                count;
index a1783b229ef4c219081fc7200fcdc5dd63c267e4..dc50bcc282a888ba1611a5485c53467d4a2ab953 100644 (file)
@@ -60,6 +60,19 @@ static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask)
        return seq_bitmap(m, mask->bits, MAX_NUMNODES);
 }
 
+int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+               unsigned int nr_bits);
+
+static inline int seq_cpumask_list(struct seq_file *m, cpumask_t *mask)
+{
+       return seq_bitmap_list(m, mask->bits, NR_CPUS);
+}
+
+static inline int seq_nodemask_list(struct seq_file *m, nodemask_t *mask)
+{
+       return seq_bitmap_list(m, mask->bits, MAX_NUMNODES);
+}
+
 int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
 int single_release(struct inode *, struct file *);
 void *__seq_open_private(struct file *, const struct seq_operations *, int);
index 270d5c208a899aaeee19148a8a39109d59e384a8..bbed279f3b32b53ff90dde829a8380fa0f8f768f 100644 (file)
@@ -47,8 +47,6 @@ static inline __attribute_const__ __u16 ___swab16(__u16 val)
 {
 #ifdef __arch_swab16
        return __arch_swab16(val);
-#elif defined(__arch_swab16p)
-       return __arch_swab16p(&val);
 #else
        return __const_swab16(val);
 #endif
@@ -58,8 +56,6 @@ static inline __attribute_const__ __u32 ___swab32(__u32 val)
 {
 #ifdef __arch_swab32
        return __arch_swab32(val);
-#elif defined(__arch_swab32p)
-       return __arch_swab32p(&val);
 #else
        return __const_swab32(val);
 #endif
@@ -69,8 +65,6 @@ static inline __attribute_const__ __u64 ___swab64(__u64 val)
 {
 #ifdef __arch_swab64
        return __arch_swab64(val);
-#elif defined(__arch_swab64p)
-       return __arch_swab64p(&val);
 #elif defined(__SWAB_64_THRU_32__)
        __u32 h = val >> 32;
        __u32 l = val & ((1ULL << 32) - 1);
@@ -84,8 +78,6 @@ static inline __attribute_const__ __u32 ___swahw32(__u32 val)
 {
 #ifdef __arch_swahw32
        return __arch_swahw32(val);
-#elif defined(__arch_swahw32p)
-       return __arch_swahw32p(&val);
 #else
        return __const_swahw32(val);
 #endif
@@ -95,8 +87,6 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
 {
 #ifdef __arch_swahb32
        return __arch_swahb32(val);
-#elif defined(__arch_swahb32p)
-       return __arch_swahb32p(&val);
 #else
        return __const_swahb32(val);
 #endif
index de40f169a4e4134fa79c2ab0c360d057d2a1f62d..a3af95b2cb6dfcf24f5d278e43a29e62f23f452e 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/list.h>
 #include <linux/memcontrol.h>
 #include <linux/sched.h>
+#include <linux/node.h>
 
 #include <asm/atomic.h>
 #include <asm/page.h>
@@ -171,8 +172,10 @@ extern unsigned int nr_free_pagecache_pages(void);
 
 
 /* linux/mm/swap.c */
-extern void lru_cache_add(struct page *);
-extern void lru_cache_add_active(struct page *);
+extern void __lru_cache_add(struct page *, enum lru_list lru);
+extern void lru_cache_add_lru(struct page *, enum lru_list lru);
+extern void lru_cache_add_active_or_unevictable(struct page *,
+                                       struct vm_area_struct *);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
@@ -180,12 +183,38 @@ extern int lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void swap_setup(void);
 
+extern void add_page_to_unevictable_list(struct page *page);
+
+/**
+ * lru_cache_add: add a page to the page lists
+ * @page: the page to add
+ */
+static inline void lru_cache_add_anon(struct page *page)
+{
+       __lru_cache_add(page, LRU_INACTIVE_ANON);
+}
+
+static inline void lru_cache_add_active_anon(struct page *page)
+{
+       __lru_cache_add(page, LRU_ACTIVE_ANON);
+}
+
+static inline void lru_cache_add_file(struct page *page)
+{
+       __lru_cache_add(page, LRU_INACTIVE_FILE);
+}
+
+static inline void lru_cache_add_active_file(struct page *page)
+{
+       __lru_cache_add(page, LRU_ACTIVE_FILE);
+}
+
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                        gfp_t gfp_mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
                                                        gfp_t gfp_mask);
-extern int __isolate_lru_page(struct page *page, int mode);
+extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
@@ -204,6 +233,34 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 }
 #endif
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+extern int page_evictable(struct page *page, struct vm_area_struct *vma);
+extern void scan_mapping_unevictable_pages(struct address_space *);
+
+extern unsigned long scan_unevictable_pages;
+extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+                                       void __user *, size_t *, loff_t *);
+extern int scan_unevictable_register_node(struct node *node);
+extern void scan_unevictable_unregister_node(struct node *node);
+#else
+static inline int page_evictable(struct page *page,
+                                               struct vm_area_struct *vma)
+{
+       return 1;
+}
+
+static inline void scan_mapping_unevictable_pages(struct address_space *mapping)
+{
+}
+
+static inline int scan_unevictable_register_node(struct node *node)
+{
+       return 0;
+}
+
+static inline void scan_unevictable_unregister_node(struct node *node) { }
+#endif
+
 extern int kswapd_run(int nid);
 
 #ifdef CONFIG_MMU
@@ -251,6 +308,7 @@ extern sector_t swapdev_block(int, pgoff_t);
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
+extern int remove_exclusive_swap_page_ref(struct page *);
 struct backing_dev_info;
 
 /* linux/mm/thrash.c */
@@ -339,6 +397,11 @@ static inline int remove_exclusive_swap_page(struct page *p)
        return 0;
 }
 
+static inline int remove_exclusive_swap_page_ref(struct page *page)
+{
+       return 0;
+}
+
 static inline swp_entry_t get_swap_page(void)
 {
        swp_entry_t entry;
index b330e289d71f280f7bb6ca3af431a733039ac420..9d68fed50f119423904db7d7f608fd4baede5a82 100644 (file)
@@ -21,8 +21,9 @@ struct kobject;
 struct module;
 
 /* FIXME
- * The *owner field is no longer used, but leave around
- * until the tree gets cleaned up fully.
+ * The *owner field is no longer used.
+ * x86 tree has been cleaned up. The owner
+ * attribute is still left for other arches.
  */
 struct attribute {
        const char              *name;
index 328eb40227277967c8a6179efb56ad14e03fc233..4c28c4d564e2da1e5e6112e0e5081d4f08ebd942 100644 (file)
@@ -2,6 +2,7 @@
 #define _LINUX_VMALLOC_H
 
 #include <linux/spinlock.h>
+#include <linux/init.h>
 #include <asm/page.h>          /* pgprot_t */
 
 struct vm_area_struct;         /* vma defining user mapping in mm_types.h */
@@ -23,7 +24,6 @@ struct vm_area_struct;                /* vma defining user mapping in mm_types.h */
 #endif
 
 struct vm_struct {
-       /* keep next,addr,size together to speedup lookups */
        struct vm_struct        *next;
        void                    *addr;
        unsigned long           size;
@@ -37,6 +37,19 @@ struct vm_struct {
 /*
  *     Highlevel APIs for driver use
  */
+extern void vm_unmap_ram(const void *mem, unsigned int count);
+extern void *vm_map_ram(struct page **pages, unsigned int count,
+                               int node, pgprot_t prot);
+extern void vm_unmap_aliases(void);
+
+#ifdef CONFIG_MMU
+extern void __init vmalloc_init(void);
+#else
+static inline void vmalloc_init(void)
+{
+}
+#endif
+
 extern void *vmalloc(unsigned long size);
 extern void *vmalloc_user(unsigned long size);
 extern void *vmalloc_node(unsigned long size, int node);
index 58334d4395167b9f3105cba86b0b68b60bca39d0..9cd3ab0f554d301224f518391b47fdd1752fc33d 100644 (file)
@@ -40,6 +40,16 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 #ifdef CONFIG_HUGETLB_PAGE
                HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
+#endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+               UNEVICTABLE_PGCULLED,   /* culled to noreclaim list */
+               UNEVICTABLE_PGSCANNED,  /* scanned for reclaimability */
+               UNEVICTABLE_PGRESCUED,  /* rescued from noreclaim list */
+               UNEVICTABLE_PGMLOCKED,
+               UNEVICTABLE_PGMUNLOCKED,
+               UNEVICTABLE_PGCLEARED,  /* on COW, page truncate */
+               UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */
+               UNEVICTABLE_MLOCKFREED,
 #endif
                NR_VM_EVENT_ITEMS
 };
@@ -159,6 +169,16 @@ static inline unsigned long zone_page_state(struct zone *zone,
        return x;
 }
 
+extern unsigned long global_lru_pages(void);
+
+static inline unsigned long zone_lru_pages(struct zone *zone)
+{
+       return (zone_page_state(zone, NR_ACTIVE_ANON)
+               + zone_page_state(zone, NR_ACTIVE_FILE)
+               + zone_page_state(zone, NR_INACTIVE_ANON)
+               + zone_page_state(zone, NR_INACTIVE_FILE));
+}
+
 #ifdef CONFIG_NUMA
 /*
  * Determine the per node value of a stat item. This function
index 0cb63ed2c1fc268843af16f911db33e571e24b38..b8093971ccb476e5288037a8c653cb06a8addf49 100644 (file)
@@ -2,9 +2,9 @@
 #define __NETNS_X_TABLES_H
 
 #include <linux/list.h>
-#include <linux/net.h>
+#include <linux/netfilter.h>
 
 struct netns_xt {
-       struct list_head tables[NPROTO];
+       struct list_head tables[NFPROTO_NUMPROTO];
 };
 #endif
index 5ceff3249a2d84cd23110b99fb3f8a076e8cf56c..8828ed0b2051f16df1d3a491419d3e868ac8d014 100644 (file)
@@ -299,6 +299,13 @@ config CGROUP_NS
           for instance virtual servers and checkpoint/restart
           jobs.
 
+config CGROUP_FREEZER
+        bool "control group freezer subsystem"
+        depends on CGROUPS
+        help
+          Provides a way to freeze and unfreeze all tasks in a
+         cgroup.
+
 config CGROUP_DEVICE
        bool "Device controller for cgroups"
        depends on CGROUPS && EXPERIMENTAL
index 27f6bf6108e96908b10df5b20e0011aaebb3266c..4371d11721f618f45ccfc65b799fbbdfebbf8140 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/gfp.h>
 #include <linux/percpu.h>
 #include <linux/kmod.h>
+#include <linux/vmalloc.h>
 #include <linux/kernel_stat.h>
 #include <linux/start_kernel.h>
 #include <linux/security.h>
@@ -642,6 +643,7 @@ asmlinkage void __init start_kernel(void)
                initrd_start = 0;
        }
 #endif
+       vmalloc_init();
        vfs_caches_init_early();
        cpuset_init_early();
        mem_init();
index 96fb36cd9874cb26b242933178426244d1ed7e13..68eb857cfdea2a1961760320a7e956608b8b432c 100644 (file)
 #define HARD_MSGMAX    (131072/sizeof(void*))
 #define DFLT_MSGSIZEMAX 8192   /* max message size */
 
+/*
+ * Define the ranges various user-specified maximum values can
+ * be set to.
+ */
+#define MIN_MSGMAX     1               /* min value for msg_max */
+#define MAX_MSGMAX     HARD_MSGMAX     /* max value for msg_max */
+#define MIN_MSGSIZEMAX 128             /* min value for msgsize_max */
+#define MAX_MSGSIZEMAX (8192*128)      /* max value for msgsize_max */
 
 struct ext_wait_queue {                /* queue of sleeping tasks */
        struct task_struct *task;
@@ -134,8 +142,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
                        info->qsize = 0;
                        info->user = NULL;      /* set when all is ok */
                        memset(&info->attr, 0, sizeof(info->attr));
-                       info->attr.mq_maxmsg = DFLT_MSGMAX;
-                       info->attr.mq_msgsize = DFLT_MSGSIZEMAX;
+                       info->attr.mq_maxmsg = msg_max;
+                       info->attr.mq_msgsize = msgsize_max;
                        if (attr) {
                                info->attr.mq_maxmsg = attr->mq_maxmsg;
                                info->attr.mq_msgsize = attr->mq_msgsize;
@@ -1191,11 +1199,11 @@ static struct file_system_type mqueue_fs_type = {
        .kill_sb = kill_litter_super,
 };
 
-static int msg_max_limit_min = DFLT_MSGMAX;
-static int msg_max_limit_max = HARD_MSGMAX;
+static int msg_max_limit_min = MIN_MSGMAX;
+static int msg_max_limit_max = MAX_MSGMAX;
 
-static int msg_maxsize_limit_min = DFLT_MSGSIZEMAX;
-static int msg_maxsize_limit_max = INT_MAX;
+static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
+static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
 
 static ctl_table mq_sysctls[] = {
        {
index e77ec698cf408c2344d302ac693814d44d411180..0add3fa5f54774d15eb81330a3f52ea9a5ddb084 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -737,6 +737,10 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
        case SHM_LOCK:
        case SHM_UNLOCK:
        {
+               struct file *uninitialized_var(shm_file);
+
+               lru_add_drain_all();  /* drain pagevecs to lru lists */
+
                shp = shm_lock_check(ns, shmid);
                if (IS_ERR(shp)) {
                        err = PTR_ERR(shp);
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer
new file mode 100644 (file)
index 0000000..a3bb4cb
--- /dev/null
@@ -0,0 +1,2 @@
+config FREEZER
+       def_bool PM_SLEEP || CGROUP_FREEZER
index 4e1d7df7c3e279af83ae70f2f70c9196044514f2..066550aa61c5018a32dff60fcd75e6ffdebbdedd 100644 (file)
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
 
+obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_UTS_NS) += utsname.o
index 8c6e1c17e6d37faae0870c2d9ff5635c1e192037..046c1609606bc627059aa6cd5df687fa86f15d3b 100644 (file)
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
        struct cg_cgroup_link *link;
        struct cg_cgroup_link *saved_link;
 
-       write_lock(&css_set_lock);
        hlist_del(&cg->hlist);
        css_set_count--;
 
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
                list_del(&link->cgrp_link_list);
                kfree(link);
        }
-
-       write_unlock(&css_set_lock);
 }
 
-static void __release_css_set(struct kref *k, int taskexit)
+static void __put_css_set(struct css_set *cg, int taskexit)
 {
        int i;
-       struct css_set *cg = container_of(k, struct css_set, ref);
-
+       /*
+        * Ensure that the refcount doesn't hit zero while any readers
+        * can see it. Similar to atomic_dec_and_lock(), but for an
+        * rwlock
+        */
+       if (atomic_add_unless(&cg->refcount, -1, 1))
+               return;
+       write_lock(&css_set_lock);
+       if (!atomic_dec_and_test(&cg->refcount)) {
+               write_unlock(&css_set_lock);
+               return;
+       }
        unlink_css_set(cg);
+       write_unlock(&css_set_lock);
 
        rcu_read_lock();
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
        kfree(cg);
 }
 
-static void release_css_set(struct kref *k)
-{
-       __release_css_set(k, 0);
-}
-
-static void release_css_set_taskexit(struct kref *k)
-{
-       __release_css_set(k, 1);
-}
-
 /*
  * refcounted get/put for css_set objects
  */
 static inline void get_css_set(struct css_set *cg)
 {
-       kref_get(&cg->ref);
+       atomic_inc(&cg->refcount);
 }
 
 static inline void put_css_set(struct css_set *cg)
 {
-       kref_put(&cg->ref, release_css_set);
+       __put_css_set(cg, 0);
 }
 
 static inline void put_css_set_taskexit(struct css_set *cg)
 {
-       kref_put(&cg->ref, release_css_set_taskexit);
+       __put_css_set(cg, 1);
 }
 
 /*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
                return NULL;
        }
 
-       kref_init(&res->ref);
+       atomic_set(&res->refcount, 1);
        INIT_LIST_HEAD(&res->cg_links);
        INIT_LIST_HEAD(&res->tasks);
        INIT_HLIST_NODE(&res->hlist);
@@ -870,6 +868,14 @@ static struct super_operations cgroup_ops = {
        .remount_fs = cgroup_remount,
 };
 
+static void init_cgroup_housekeeping(struct cgroup *cgrp)
+{
+       INIT_LIST_HEAD(&cgrp->sibling);
+       INIT_LIST_HEAD(&cgrp->children);
+       INIT_LIST_HEAD(&cgrp->css_sets);
+       INIT_LIST_HEAD(&cgrp->release_list);
+       init_rwsem(&cgrp->pids_mutex);
+}
 static void init_cgroup_root(struct cgroupfs_root *root)
 {
        struct cgroup *cgrp = &root->top_cgroup;
@@ -878,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
        root->number_of_cgroups = 1;
        cgrp->root = root;
        cgrp->top_cgroup = cgrp;
-       INIT_LIST_HEAD(&cgrp->sibling);
-       INIT_LIST_HEAD(&cgrp->children);
-       INIT_LIST_HEAD(&cgrp->css_sets);
-       INIT_LIST_HEAD(&cgrp->release_list);
+       init_cgroup_housekeeping(cgrp);
 }
 
 static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1728,7 +1731,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
 
        read_lock(&css_set_lock);
        list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
-               count += atomic_read(&link->cg->ref.refcount);
+               count += atomic_read(&link->cg->refcount);
        }
        read_unlock(&css_set_lock);
        return count;
@@ -1997,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  * but we cannot guarantee that the information we produce is correct
  * unless we produce it entirely atomically.
  *
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here).  The struct
- * ctr_struct * is stored in file->private_data.  Its resources will
- * be freed by release() when the file is closed.  The array is used
- * to sprintf the PIDs and then used by read().
  */
-struct ctr_struct {
-       char *buf;
-       int bufsz;
-};
 
 /*
  * Load into 'pidarray' up to 'npids' of the tasks using cgroup
@@ -2088,42 +2082,132 @@ static int cmppid(const void *a, const void *b)
        return *(pid_t *)a - *(pid_t *)b;
 }
 
+
 /*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
- * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
- * count 'cnt' of how many chars would be written if buf were large enough.
+ * seq_file methods for the "tasks" file. The seq_file position is the
+ * next pid to display; the seq_file iterator is a pointer to the pid
+ * in the cgroup->tasks_pids array.
  */
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+
+static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 {
-       int cnt = 0;
-       int i;
+       /*
+        * Initially we receive a position value that corresponds to
+        * one more than the last pid shown (or 0 on the first call or
+        * after a seek to the start). Use a binary-search to find the
+        * next pid to display, if any
+        */
+       struct cgroup *cgrp = s->private;
+       int index = 0, pid = *pos;
+       int *iter;
 
-       for (i = 0; i < npids; i++)
-               cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
-       return cnt;
+       down_read(&cgrp->pids_mutex);
+       if (pid) {
+               int end = cgrp->pids_length;
+               int i;
+               while (index < end) {
+                       int mid = (index + end) / 2;
+                       if (cgrp->tasks_pids[mid] == pid) {
+                               index = mid;
+                               break;
+                       } else if (cgrp->tasks_pids[mid] <= pid)
+                               index = mid + 1;
+                       else
+                               end = mid;
+               }
+       }
+       /* If we're off the end of the array, we're done */
+       if (index >= cgrp->pids_length)
+               return NULL;
+       /* Update the abstract position to be the actual pid that we found */
+       iter = cgrp->tasks_pids + index;
+       *pos = *iter;
+       return iter;
+}
+
+static void cgroup_tasks_stop(struct seq_file *s, void *v)
+{
+       struct cgroup *cgrp = s->private;
+       up_read(&cgrp->pids_mutex);
 }
 
+static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct cgroup *cgrp = s->private;
+       int *p = v;
+       int *end = cgrp->tasks_pids + cgrp->pids_length;
+
+       /*
+        * Advance to the next pid in the array. If this goes off the
+        * end, we're done
+        */
+       p++;
+       if (p >= end) {
+               return NULL;
+       } else {
+               *pos = *p;
+               return p;
+       }
+}
+
+static int cgroup_tasks_show(struct seq_file *s, void *v)
+{
+       return seq_printf(s, "%d\n", *(int *)v);
+}
+
+static struct seq_operations cgroup_tasks_seq_operations = {
+       .start = cgroup_tasks_start,
+       .stop = cgroup_tasks_stop,
+       .next = cgroup_tasks_next,
+       .show = cgroup_tasks_show,
+};
+
+static void release_cgroup_pid_array(struct cgroup *cgrp)
+{
+       down_write(&cgrp->pids_mutex);
+       BUG_ON(!cgrp->pids_use_count);
+       if (!--cgrp->pids_use_count) {
+               kfree(cgrp->tasks_pids);
+               cgrp->tasks_pids = NULL;
+               cgrp->pids_length = 0;
+       }
+       up_write(&cgrp->pids_mutex);
+}
+
+static int cgroup_tasks_release(struct inode *inode, struct file *file)
+{
+       struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+       if (!(file->f_mode & FMODE_READ))
+               return 0;
+
+       release_cgroup_pid_array(cgrp);
+       return seq_release(inode, file);
+}
+
+static struct file_operations cgroup_tasks_operations = {
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .write = cgroup_file_write,
+       .release = cgroup_tasks_release,
+};
+
 /*
- * Handle an open on 'tasks' file.  Prepare a buffer listing the
+ * Handle an open on 'tasks' file.  Prepare an array containing the
  * process id's of tasks currently attached to the cgroup being opened.
- *
- * Does not require any specific cgroup mutexes, and does not take any.
  */
+
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-       struct ctr_struct *ctr;
        pid_t *pidarray;
        int npids;
-       char c;
+       int retval;
 
+       /* Nothing to do for write-only files */
        if (!(file->f_mode & FMODE_READ))
                return 0;
 
-       ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
-       if (!ctr)
-               goto err0;
-
        /*
         * If cgroup gets more users after we read count, we won't have
         * enough space - tough.  This race is indistinguishable to the
@@ -2131,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
         * show up until sometime later on.
         */
        npids = cgroup_task_count(cgrp);
-       if (npids) {
-               pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-               if (!pidarray)
-                       goto err1;
-
-               npids = pid_array_load(pidarray, npids, cgrp);
-               sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-               /* Call pid_array_to_buf() twice, first just to get bufsz */
-               ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
-               ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
-               if (!ctr->buf)
-                       goto err2;
-               ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-
-               kfree(pidarray);
-       } else {
-               ctr->buf = NULL;
-               ctr->bufsz = 0;
-       }
-       file->private_data = ctr;
-       return 0;
-
-err2:
-       kfree(pidarray);
-err1:
-       kfree(ctr);
-err0:
-       return -ENOMEM;
-}
+       pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+       if (!pidarray)
+               return -ENOMEM;
+       npids = pid_array_load(pidarray, npids, cgrp);
+       sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
 
-static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
-                                   struct cftype *cft,
-                                   struct file *file, char __user *buf,
-                                   size_t nbytes, loff_t *ppos)
-{
-       struct ctr_struct *ctr = file->private_data;
+       /*
+        * Store the array in the cgroup, freeing the old
+        * array if necessary
+        */
+       down_write(&cgrp->pids_mutex);
+       kfree(cgrp->tasks_pids);
+       cgrp->tasks_pids = pidarray;
+       cgrp->pids_length = npids;
+       cgrp->pids_use_count++;
+       up_write(&cgrp->pids_mutex);
 
-       return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
-}
+       file->f_op = &cgroup_tasks_operations;
 
-static int cgroup_tasks_release(struct inode *unused_inode,
-                                       struct file *file)
-{
-       struct ctr_struct *ctr;
-
-       if (file->f_mode & FMODE_READ) {
-               ctr = file->private_data;
-               kfree(ctr->buf);
-               kfree(ctr);
+       retval = seq_open(file, &cgroup_tasks_seq_operations);
+       if (retval) {
+               release_cgroup_pid_array(cgrp);
+               return retval;
        }
+       ((struct seq_file *)file->private_data)->private = cgrp;
        return 0;
 }
 
@@ -2210,7 +2268,6 @@ static struct cftype files[] = {
        {
                .name = "tasks",
                .open = cgroup_tasks_open,
-               .read = cgroup_tasks_read,
                .write_u64 = cgroup_tasks_write,
                .release = cgroup_tasks_release,
                .private = FILE_TASKLIST,
@@ -2300,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 
        mutex_lock(&cgroup_mutex);
 
-       INIT_LIST_HEAD(&cgrp->sibling);
-       INIT_LIST_HEAD(&cgrp->children);
-       INIT_LIST_HEAD(&cgrp->css_sets);
-       INIT_LIST_HEAD(&cgrp->release_list);
+       init_cgroup_housekeeping(cgrp);
 
        cgrp->parent = parent;
        cgrp->root = parent->root;
@@ -2495,8 +2549,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 int __init cgroup_init_early(void)
 {
        int i;
-       kref_init(&init_css_set.ref);
-       kref_get(&init_css_set.ref);
+       atomic_set(&init_css_set.refcount, 1);
        INIT_LIST_HEAD(&init_css_set.cg_links);
        INIT_LIST_HEAD(&init_css_set.tasks);
        INIT_HLIST_NODE(&init_css_set.hlist);
index c3dc3aba4c02a5b243b25b1ed7be971c5ec1ef17..daca6209202df4a84df7e0fc41025e33c7642131 100644 (file)
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
        u64 count;
 
        rcu_read_lock();
-       count = atomic_read(&current->cgroups->ref.refcount);
+       count = atomic_read(&current->cgroups->refcount);
        rcu_read_unlock();
        return count;
 }
@@ -90,7 +90,7 @@ static struct cftype files[] =  {
        {
                .name = "releasable",
                .read_u64 = releasable_read,
-       }
+       },
 };
 
 static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
new file mode 100644 (file)
index 0000000..e950569
--- /dev/null
@@ -0,0 +1,379 @@
+/*
+ * cgroup_freezer.c -  control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+
+enum freezer_state {
+       CGROUP_THAWED = 0,
+       CGROUP_FREEZING,
+       CGROUP_FROZEN,
+};
+
+struct freezer {
+       struct cgroup_subsys_state css;
+       enum freezer_state state;
+       spinlock_t lock; /* protects _writes_ to state */
+};
+
+static inline struct freezer *cgroup_freezer(
+               struct cgroup *cgroup)
+{
+       return container_of(
+               cgroup_subsys_state(cgroup, freezer_subsys_id),
+               struct freezer, css);
+}
+
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+       return container_of(task_subsys_state(task, freezer_subsys_id),
+                           struct freezer, css);
+}
+
+int cgroup_frozen(struct task_struct *task)
+{
+       struct freezer *freezer;
+       enum freezer_state state;
+
+       task_lock(task);
+       freezer = task_freezer(task);
+       state = freezer->state;
+       task_unlock(task);
+
+       return state == CGROUP_FROZEN;
+}
+
+/*
+ * cgroups_write_string() limits the size of freezer state strings to
+ * CGROUP_LOCAL_BUFFER_SIZE
+ */
+static const char *freezer_state_strs[] = {
+       "THAWED",
+       "FREEZING",
+       "FROZEN",
+};
+
+/*
+ * State diagram
+ * Transitions are caused by userspace writes to the freezer.state file.
+ * The values in parenthesis are state labels. The rest are edge labels.
+ *
+ * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
+ *    ^ ^                    |                     |
+ *    | \_______THAWED_______/                     |
+ *    \__________________________THAWED____________/
+ */
+
+struct cgroup_subsys freezer_subsys;
+
+/* Locks taken and their ordering
+ * ------------------------------
+ * css_set_lock
+ * cgroup_mutex (AKA cgroup_lock)
+ * task->alloc_lock (AKA task_lock)
+ * freezer->lock
+ * task->sighand->siglock
+ *
+ * cgroup code forces css_set_lock to be taken before task->alloc_lock
+ *
+ * freezer_create(), freezer_destroy():
+ * cgroup_mutex [ by cgroup core ]
+ *
+ * can_attach():
+ * cgroup_mutex
+ *
+ * cgroup_frozen():
+ * task->alloc_lock (to get task's cgroup)
+ *
+ * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
+ * task->alloc_lock (to get task's cgroup)
+ * freezer->lock
+ *  sighand->siglock (if the cgroup is freezing)
+ *
+ * freezer_read():
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *
+ * freezer_write() (freeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    sighand->siglock
+ *
+ * freezer_write() (unfreeze):
+ * cgroup_mutex
+ *  freezer->lock
+ *   read_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock (to prevent races with freeze_task())
+ *     sighand->siglock
+ */
+static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
+                                                 struct cgroup *cgroup)
+{
+       struct freezer *freezer;
+
+       freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+       if (!freezer)
+               return ERR_PTR(-ENOMEM);
+
+       spin_lock_init(&freezer->lock);
+       freezer->state = CGROUP_THAWED;
+       return &freezer->css;
+}
+
+static void freezer_destroy(struct cgroup_subsys *ss,
+                           struct cgroup *cgroup)
+{
+       kfree(cgroup_freezer(cgroup));
+}
+
+/* Task is frozen or will freeze immediately when next it gets woken */
+static bool is_task_frozen_enough(struct task_struct *task)
+{
+       return frozen(task) ||
+               (task_is_stopped_or_traced(task) && freezing(task));
+}
+
+/*
+ * The call to cgroup_lock() in the freezer.state write method prevents
+ * a write to that file racing against an attach, and hence the
+ * can_attach() result will remain valid until the attach completes.
+ */
+static int freezer_can_attach(struct cgroup_subsys *ss,
+                             struct cgroup *new_cgroup,
+                             struct task_struct *task)
+{
+       struct freezer *freezer;
+       int retval;
+
+       /* Anything frozen can't move or be moved to/from */
+
+       if (is_task_frozen_enough(task))
+               return -EBUSY;
+
+       freezer = cgroup_freezer(new_cgroup);
+       if (freezer->state == CGROUP_FROZEN)
+               return -EBUSY;
+
+       retval = 0;
+       task_lock(task);
+       freezer = task_freezer(task);
+       if (freezer->state == CGROUP_FROZEN)
+               retval = -EBUSY;
+       task_unlock(task);
+       return retval;
+}
+
+static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
+{
+       struct freezer *freezer;
+
+       task_lock(task);
+       freezer = task_freezer(task);
+       task_unlock(task);
+
+       BUG_ON(freezer->state == CGROUP_FROZEN);
+       spin_lock_irq(&freezer->lock);
+       /* Locking avoids race with FREEZING -> THAWED transitions. */
+       if (freezer->state == CGROUP_FREEZING)
+               freeze_task(task, true);
+       spin_unlock_irq(&freezer->lock);
+}
+
+/*
+ * caller must hold freezer->lock
+ */
+static void update_freezer_state(struct cgroup *cgroup,
+                                struct freezer *freezer)
+{
+       struct cgroup_iter it;
+       struct task_struct *task;
+       unsigned int nfrozen = 0, ntotal = 0;
+
+       cgroup_iter_start(cgroup, &it);
+       while ((task = cgroup_iter_next(cgroup, &it))) {
+               ntotal++;
+               if (is_task_frozen_enough(task))
+                       nfrozen++;
+       }
+
+       /*
+        * Transition to FROZEN when no new tasks can be added ensures
+        * that we never exist in the FROZEN state while there are unfrozen
+        * tasks.
+        */
+       if (nfrozen == ntotal)
+               freezer->state = CGROUP_FROZEN;
+       else if (nfrozen > 0)
+               freezer->state = CGROUP_FREEZING;
+       else
+               freezer->state = CGROUP_THAWED;
+       cgroup_iter_end(cgroup, &it);
+}
+
+static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
+                       struct seq_file *m)
+{
+       struct freezer *freezer;
+       enum freezer_state state;
+
+       if (!cgroup_lock_live_group(cgroup))
+               return -ENODEV;
+
+       freezer = cgroup_freezer(cgroup);
+       spin_lock_irq(&freezer->lock);
+       state = freezer->state;
+       if (state == CGROUP_FREEZING) {
+               /* We change from FREEZING to FROZEN lazily if the cgroup was
+                * only partially frozen when we exitted write. */
+               update_freezer_state(cgroup, freezer);
+               state = freezer->state;
+       }
+       spin_unlock_irq(&freezer->lock);
+       cgroup_unlock();
+
+       seq_puts(m, freezer_state_strs[state]);
+       seq_putc(m, '\n');
+       return 0;
+}
+
+static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+       struct cgroup_iter it;
+       struct task_struct *task;
+       unsigned int num_cant_freeze_now = 0;
+
+       freezer->state = CGROUP_FREEZING;
+       cgroup_iter_start(cgroup, &it);
+       while ((task = cgroup_iter_next(cgroup, &it))) {
+               if (!freeze_task(task, true))
+                       continue;
+               if (is_task_frozen_enough(task))
+                       continue;
+               if (!freezing(task) && !freezer_should_skip(task))
+                       num_cant_freeze_now++;
+       }
+       cgroup_iter_end(cgroup, &it);
+
+       return num_cant_freeze_now ? -EBUSY : 0;
+}
+
+static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+{
+       struct cgroup_iter it;
+       struct task_struct *task;
+
+       cgroup_iter_start(cgroup, &it);
+       while ((task = cgroup_iter_next(cgroup, &it))) {
+               int do_wake;
+
+               task_lock(task);
+               do_wake = __thaw_process(task);
+               task_unlock(task);
+               if (do_wake)
+                       wake_up_process(task);
+       }
+       cgroup_iter_end(cgroup, &it);
+       freezer->state = CGROUP_THAWED;
+
+       return 0;
+}
+
+static int freezer_change_state(struct cgroup *cgroup,
+                               enum freezer_state goal_state)
+{
+       struct freezer *freezer;
+       int retval = 0;
+
+       freezer = cgroup_freezer(cgroup);
+       spin_lock_irq(&freezer->lock);
+       update_freezer_state(cgroup, freezer);
+       if (goal_state == freezer->state)
+               goto out;
+       switch (freezer->state) {
+       case CGROUP_THAWED:
+               retval = try_to_freeze_cgroup(cgroup, freezer);
+               break;
+       case CGROUP_FREEZING:
+               if (goal_state == CGROUP_FROZEN) {
+                       /* Userspace is retrying after
+                        * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
+                       retval = try_to_freeze_cgroup(cgroup, freezer);
+                       break;
+               }
+               /* state == FREEZING and goal_state == THAWED, so unfreeze */
+       case CGROUP_FROZEN:
+               retval = unfreeze_cgroup(cgroup, freezer);
+               break;
+       default:
+               break;
+       }
+out:
+       spin_unlock_irq(&freezer->lock);
+
+       return retval;
+}
+
+static int freezer_write(struct cgroup *cgroup,
+                        struct cftype *cft,
+                        const char *buffer)
+{
+       int retval;
+       enum freezer_state goal_state;
+
+       if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
+               goal_state = CGROUP_THAWED;
+       else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
+               goal_state = CGROUP_FROZEN;
+       else
+               return -EIO;
+
+       if (!cgroup_lock_live_group(cgroup))
+               return -ENODEV;
+       retval = freezer_change_state(cgroup, goal_state);
+       cgroup_unlock();
+       return retval;
+}
+
+static struct cftype files[] = {
+       {
+               .name = "state",
+               .read_seq_string = freezer_read,
+               .write_string = freezer_write,
+       },
+};
+
+static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
+{
+       return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys freezer_subsys = {
+       .name           = "freezer",
+       .create         = freezer_create,
+       .destroy        = freezer_destroy,
+       .populate       = freezer_populate,
+       .subsys_id      = freezer_subsys_id,
+       .can_attach     = freezer_can_attach,
+       .attach         = NULL,
+       .fork           = freezer_fork,
+       .exit           = NULL,
+};
index 4c345210ed8cca3ffb1c64a9e4aef92cf97eedf8..abaee684ecbf95fe1d47426d7b47b77e6ed91fab 100644 (file)
@@ -54,9 +54,6 @@
 
 #ifdef CONFIG_IKCONFIG_PROC
 
-/**************************************************/
-/* globals and useful constants                   */
-
 static ssize_t
 ikconfig_read_current(struct file *file, char __user *buf,
                      size_t len, loff_t * offset)
@@ -71,9 +68,6 @@ static const struct file_operations ikconfig_file_ops = {
        .read = ikconfig_read_current,
 };
 
-/***************************************************/
-/* ikconfig_init: start up everything we need to */
-
 static int __init ikconfig_init(void)
 {
        struct proc_dir_entry *entry;
@@ -89,9 +83,6 @@ static int __init ikconfig_init(void)
        return 0;
 }
 
-/***************************************************/
-/* ikconfig_cleanup: clean up our mess           */
-
 static void __exit ikconfig_cleanup(void)
 {
        remove_proc_entry("config.gz", NULL);
index eab7bd6628e0ad48ef5f1d4fcea2af05f173bae5..3e00526f52ec14d56eb89ee1ab5c1a40fbed1331 100644 (file)
@@ -1172,7 +1172,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 {
        struct cpuset trialcs;
        int err;
-       int cpus_nonempty, balance_flag_changed;
+       int balance_flag_changed;
 
        trialcs = *cs;
        if (turning_on)
@@ -1184,7 +1184,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
        if (err < 0)
                return err;
 
-       cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
        balance_flag_changed = (is_sched_load_balance(cs) !=
                                        is_sched_load_balance(&trialcs));
 
@@ -1192,7 +1191,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
        cs->flags = trialcs.flags;
        mutex_unlock(&callback_mutex);
 
-       if (cpus_nonempty && balance_flag_changed)
+       if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
                async_rebuild_sched_domains();
 
        return 0;
@@ -2437,19 +2436,15 @@ const struct file_operations proc_cpuset_operations = {
 void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
 {
        seq_printf(m, "Cpus_allowed:\t");
-       m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
-                                       task->cpus_allowed);
+       seq_cpumask(m, &task->cpus_allowed);
        seq_printf(m, "\n");
        seq_printf(m, "Cpus_allowed_list:\t");
-       m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
-                                       task->cpus_allowed);
+       seq_cpumask_list(m, &task->cpus_allowed);
        seq_printf(m, "\n");
        seq_printf(m, "Mems_allowed:\t");
-       m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
-                                       task->mems_allowed);
+       seq_nodemask(m, &task->mems_allowed);
        seq_printf(m, "\n");
        seq_printf(m, "Mems_allowed_list:\t");
-       m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
-                                       task->mems_allowed);
+       seq_nodemask_list(m, &task->mems_allowed);
        seq_printf(m, "\n");
 }
diff --git a/kernel/freezer.c b/kernel/freezer.c
new file mode 100644 (file)
index 0000000..ba6248b
--- /dev/null
@@ -0,0 +1,154 @@
+/*
+ * kernel/freezer.c - Function to freeze a process
+ *
+ * Originally from kernel/power/process.c
+ */
+
+#include <linux/interrupt.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+
+/*
+ * freezing is complete, mark current process as frozen
+ */
+static inline void frozen_process(void)
+{
+       if (!unlikely(current->flags & PF_NOFREEZE)) {
+               current->flags |= PF_FROZEN;
+               wmb();
+       }
+       clear_freeze_flag(current);
+}
+
+/* Refrigerator is place where frozen processes are stored :-). */
+void refrigerator(void)
+{
+       /* Hmm, should we be allowed to suspend when there are realtime
+          processes around? */
+       long save;
+
+       task_lock(current);
+       if (freezing(current)) {
+               frozen_process();
+               task_unlock(current);
+       } else {
+               task_unlock(current);
+               return;
+       }
+       save = current->state;
+       pr_debug("%s entered refrigerator\n", current->comm);
+
+       spin_lock_irq(&current->sighand->siglock);
+       recalc_sigpending(); /* We sent fake signal, clean it up */
+       spin_unlock_irq(&current->sighand->siglock);
+
+       for (;;) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (!frozen(current))
+                       break;
+               schedule();
+       }
+       pr_debug("%s left refrigerator\n", current->comm);
+       __set_current_state(save);
+}
+EXPORT_SYMBOL(refrigerator);
+
+static void fake_signal_wake_up(struct task_struct *p)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&p->sighand->siglock, flags);
+       signal_wake_up(p, 0);
+       spin_unlock_irqrestore(&p->sighand->siglock, flags);
+}
+
+/**
+ *     freeze_task - send a freeze request to given task
+ *     @p: task to send the request to
+ *     @sig_only: if set, the request will only be sent if the task has the
+ *             PF_FREEZER_NOSIG flag unset
+ *     Return value: 'false', if @sig_only is set and the task has
+ *             PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ *
+ *     The freeze request is sent by setting the tasks's TIF_FREEZE flag and
+ *     either sending a fake signal to it or waking it up, depending on whether
+ *     or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
+ *     has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
+ *     TIF_FREEZE flag will not be set.
+ */
+bool freeze_task(struct task_struct *p, bool sig_only)
+{
+       /*
+        * We first check if the task is freezing and next if it has already
+        * been frozen to avoid the race with frozen_process() which first marks
+        * the task as frozen and next clears its TIF_FREEZE.
+        */
+       if (!freezing(p)) {
+               rmb();
+               if (frozen(p))
+                       return false;
+
+               if (!sig_only || should_send_signal(p))
+                       set_freeze_flag(p);
+               else
+                       return false;
+       }
+
+       if (should_send_signal(p)) {
+               if (!signal_pending(p))
+                       fake_signal_wake_up(p);
+       } else if (sig_only) {
+               return false;
+       } else {
+               wake_up_state(p, TASK_INTERRUPTIBLE);
+       }
+
+       return true;
+}
+
+void cancel_freezing(struct task_struct *p)
+{
+       unsigned long flags;
+
+       if (freezing(p)) {
+               pr_debug("  clean up: %s\n", p->comm);
+               clear_freeze_flag(p);
+               spin_lock_irqsave(&p->sighand->siglock, flags);
+               recalc_sigpending_and_wake(p);
+               spin_unlock_irqrestore(&p->sighand->siglock, flags);
+       }
+}
+
+/*
+ * Wake up a frozen process
+ *
+ * task_lock() is needed to prevent the race with refrigerator() which may
+ * occur if the freezing of tasks fails.  Namely, without the lock, if the
+ * freezing of tasks failed, thaw_tasks() might have run before a task in
+ * refrigerator() could call frozen_process(), in which case the task would be
+ * frozen and no one would thaw it.
+ */
+int __thaw_process(struct task_struct *p)
+{
+       if (frozen(p)) {
+               p->flags &= ~PF_FROZEN;
+               return 1;
+       }
+       clear_freeze_flag(p);
+       return 0;
+}
+
+int thaw_process(struct task_struct *p)
+{
+       task_lock(p);
+       if (__thaw_process(p) == 1) {
+               task_unlock(p);
+               wake_up_process(p);
+               return 1;
+       }
+       task_unlock(p);
+       return 0;
+}
+EXPORT_SYMBOL(thaw_process);
index aef265325cd3596db981ed2515efec42f255804e..777ac458ac993b8195a3c3cb7042a941ba611339 100644 (file)
@@ -1371,6 +1371,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_SYMBOL(node_online_map);
        VMCOREINFO_SYMBOL(swapper_pg_dir);
        VMCOREINFO_SYMBOL(_stext);
+       VMCOREINFO_SYMBOL(vmlist);
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
        VMCOREINFO_SYMBOL(mem_map);
@@ -1406,6 +1407,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_OFFSET(free_area, free_list);
        VMCOREINFO_OFFSET(list_head, next);
        VMCOREINFO_OFFSET(list_head, prev);
+       VMCOREINFO_OFFSET(vm_struct, addr);
        VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
        VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
        VMCOREINFO_NUMBER(NR_FREE_PAGES);
index 96cff2f8710baac71e7ce91ec96202665142b5e8..14ec64fe175ad5a742aac30718362a1f6875e9af 100644 (file)
@@ -171,12 +171,11 @@ EXPORT_SYMBOL(kthread_create);
  */
 void kthread_bind(struct task_struct *k, unsigned int cpu)
 {
-       if (k->state != TASK_UNINTERRUPTIBLE) {
+       /* Must have done schedule() in kthread() before we set_task_cpu */
+       if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) {
                WARN_ON(1);
                return;
        }
-       /* Must have done schedule() in kthread() before we set_task_cpu */
-       wait_task_inactive(k, 0);
        set_task_cpu(k, cpu);
        k->cpus_allowed = cpumask_of_cpu(cpu);
        k->rt.nr_cpus_allowed = 1;
index 278946aecaf0d24b46b0736506de8ca3aafdc873..ca634019497a5cf8b035872bdaf8edcfaefd8b88 100644 (file)
@@ -28,121 +28,6 @@ static inline int freezeable(struct task_struct * p)
        return 1;
 }
 
-/*
- * freezing is complete, mark current process as frozen
- */
-static inline void frozen_process(void)
-{
-       if (!unlikely(current->flags & PF_NOFREEZE)) {
-               current->flags |= PF_FROZEN;
-               wmb();
-       }
-       clear_freeze_flag(current);
-}
-
-/* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
-{
-       /* Hmm, should we be allowed to suspend when there are realtime
-          processes around? */
-       long save;
-
-       task_lock(current);
-       if (freezing(current)) {
-               frozen_process();
-               task_unlock(current);
-       } else {
-               task_unlock(current);
-               return;
-       }
-       save = current->state;
-       pr_debug("%s entered refrigerator\n", current->comm);
-
-       spin_lock_irq(&current->sighand->siglock);
-       recalc_sigpending(); /* We sent fake signal, clean it up */
-       spin_unlock_irq(&current->sighand->siglock);
-
-       for (;;) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               if (!frozen(current))
-                       break;
-               schedule();
-       }
-       pr_debug("%s left refrigerator\n", current->comm);
-       __set_current_state(save);
-}
-
-static void fake_signal_wake_up(struct task_struct *p)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&p->sighand->siglock, flags);
-       signal_wake_up(p, 0);
-       spin_unlock_irqrestore(&p->sighand->siglock, flags);
-}
-
-static inline bool should_send_signal(struct task_struct *p)
-{
-       return !(p->flags & PF_FREEZER_NOSIG);
-}
-
-/**
- *     freeze_task - send a freeze request to given task
- *     @p: task to send the request to
- *     @sig_only: if set, the request will only be sent if the task has the
- *             PF_FREEZER_NOSIG flag unset
- *     Return value: 'false', if @sig_only is set and the task has
- *             PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
- *
- *     The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- *     either sending a fake signal to it or waking it up, depending on whether
- *     or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
- *     has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- *     TIF_FREEZE flag will not be set.
- */
-static bool freeze_task(struct task_struct *p, bool sig_only)
-{
-       /*
-        * We first check if the task is freezing and next if it has already
-        * been frozen to avoid the race with frozen_process() which first marks
-        * the task as frozen and next clears its TIF_FREEZE.
-        */
-       if (!freezing(p)) {
-               rmb();
-               if (frozen(p))
-                       return false;
-
-               if (!sig_only || should_send_signal(p))
-                       set_freeze_flag(p);
-               else
-                       return false;
-       }
-
-       if (should_send_signal(p)) {
-               if (!signal_pending(p))
-                       fake_signal_wake_up(p);
-       } else if (sig_only) {
-               return false;
-       } else {
-               wake_up_state(p, TASK_INTERRUPTIBLE);
-       }
-
-       return true;
-}
-
-static void cancel_freezing(struct task_struct *p)
-{
-       unsigned long flags;
-
-       if (freezing(p)) {
-               pr_debug("  clean up: %s\n", p->comm);
-               clear_freeze_flag(p);
-               spin_lock_irqsave(&p->sighand->siglock, flags);
-               recalc_sigpending_and_wake(p);
-               spin_unlock_irqrestore(&p->sighand->siglock, flags);
-       }
-}
-
 static int try_to_freeze_tasks(bool sig_only)
 {
        struct task_struct *g, *p;
@@ -250,6 +135,9 @@ static void thaw_tasks(bool nosig_only)
                if (nosig_only && should_send_signal(p))
                        continue;
 
+               if (cgroup_frozen(p))
+                       continue;
+
                thaw_process(p);
        } while_each_thread(g, p);
        read_unlock(&tasklist_lock);
@@ -264,4 +152,3 @@ void thaw_processes(void)
        printk("done.\n");
 }
 
-EXPORT_SYMBOL(refrigerator);
index 356699a96d565faa2e034253f9178c7d0aa2829d..1e68e4c39e2c05306dbfd9d111ef018aa0f8d9ca 100644 (file)
@@ -45,7 +45,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
  * TASK_TRACED, resume it now.
  * Requires that irqs be disabled.
  */
-void ptrace_untrace(struct task_struct *child)
+static void ptrace_untrace(struct task_struct *child)
 {
        spin_lock(&child->sighand->siglock);
        if (task_is_traced(child)) {
index ca4bbbe04aa4db9d150a503c9025dbe83e0d62a3..59236e8b9daa38e1e92a709e769fa75d857bb41e 100644 (file)
@@ -54,9 +54,9 @@
 #include <linux/cpu.h>
 #include <linux/random.h>
 #include <linux/delay.h>
-#include <linux/byteorder/swabb.h>
 #include <linux/cpumask.h>
 #include <linux/rcupreempt_trace.h>
+#include <asm/byteorder.h>
 
 /*
  * PREEMPT_RCU data structures.
index 617d41e4d6a08eae69152515e1ac47a9c8a3fc85..b3cc73931d1f6411c6e99faf205124987d4a887e 100644 (file)
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "scan_unevictable_pages",
+               .data           = &scan_unevictable_pages,
+               .maxlen         = sizeof(scan_unevictable_pages),
+               .mode           = 0644,
+               .proc_handler   = &scan_unevictable_handler,
+       },
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
index 06fb57c86de07007dad10f4bbeb3b1adcc0742c4..482df94ea21eb3e9ab7b2ef4efe88643147c3d80 100644 (file)
@@ -315,17 +315,6 @@ int bitmap_scnprintf(char *buf, unsigned int buflen,
 }
 EXPORT_SYMBOL(bitmap_scnprintf);
 
-/**
- * bitmap_scnprintf_len - return buffer length needed to convert
- * bitmap to an ASCII hex string
- * @nr_bits: number of bits to be converted
- */
-int bitmap_scnprintf_len(unsigned int nr_bits)
-{
-       unsigned int nr_nibbles = ALIGN(nr_bits, 4) / 4;
-       return nr_nibbles + ALIGN(nr_nibbles, CHUNKSZ / 4) / (CHUNKSZ / 4) - 1;
-}
-
 /**
  * __bitmap_parse - convert an ASCII hex string into a bitmap.
  * @buf: pointer to buffer containing string.
index cceecb6a963d1638bd420a82f7a3b85014cea379..a013bbc237178c1bc86c522b0e3ee4a66f47a2c9 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
+#include <linux/ioport.h>
 
 #include <asm/page.h>          /* for PAGE_SIZE */
 #include <asm/div64.h>
@@ -550,18 +551,51 @@ static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int
 #endif
 }
 
+static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags)
+{
+#ifndef IO_RSRC_PRINTK_SIZE
+#define IO_RSRC_PRINTK_SIZE    4
+#endif
+
+#ifndef MEM_RSRC_PRINTK_SIZE
+#define MEM_RSRC_PRINTK_SIZE   8
+#endif
+
+       /* room for the actual numbers, the two "0x", -, [, ] and the final zero */
+       char sym[4*sizeof(resource_size_t) + 8];
+       char *p = sym, *pend = sym + sizeof(sym);
+       int size = -1;
+
+       if (res->flags & IORESOURCE_IO)
+               size = IO_RSRC_PRINTK_SIZE;
+       else if (res->flags & IORESOURCE_MEM)
+               size = MEM_RSRC_PRINTK_SIZE;
+
+       *p++ = '[';
+       p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+       *p++ = '-';
+       p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+       *p++ = ']';
+       *p = 0;
+
+       return string(buf, end, sym, field_width, precision, flags);
+}
+
 /*
  * Show a '%p' thing.  A kernel extension is that the '%p' is followed
  * by an extra set of alphanumeric characters that are extended format
  * specifiers.
  *
- * Right now we just handle 'F' (for symbolic Function descriptor pointers)
- * and 'S' (for Symbolic direct pointers), but this can easily be
- * extended in the future (network address types etc).
+ * Right now we handle:
+ *
+ * - 'F' For symbolic function descriptor pointers
+ * - 'S' For symbolic direct pointers
+ * - 'R' For a struct resource pointer, it prints the range of
+ *       addresses (not the name nor the flags)
  *
- * The difference between 'S' and 'F' is that on ia64 and ppc64 function
- * pointers are really function descriptors, which contain a pointer the
- * real address. 
+ * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
+ * function pointers are really function descriptors, which contain a
+ * pointer to the real address.
  */
 static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
 {
@@ -571,6 +605,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
                /* Fallthrough */
        case 'S':
                return symbol_string(buf, end, ptr, field_width, precision, flags);
+       case 'R':
+               return resource_string(buf, end, ptr, field_width, precision, flags);
        }
        flags |= SMALL;
        if (field_width == -1) {
@@ -590,6 +626,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
  * This function follows C99 vsnprintf, but has some extensions:
  * %pS output the name of a text symbol
  * %pF output the name of a function pointer
+ * %pR output the address range in a struct resource
  *
  * The return value is the number of characters which would
  * be generated for the given input, excluding the trailing
index 1a501a4de95cba98543bf706d8c8078ce598d3cd..5b5790f8a81686644a2d27beee9aa1f1a7f006c3 100644 (file)
@@ -209,5 +209,16 @@ config VIRT_TO_BUS
        def_bool y
        depends on !ARCH_NO_VIRT_TO_BUS
 
+config UNEVICTABLE_LRU
+       bool "Add LRU list to track non-evictable pages"
+       default y
+       depends on MMU
+       help
+         Keeps unevictable pages off of the active and inactive pageout
+         lists, so kswapd will not waste CPU time or have its balancing
+         algorithms thrown off by scanning these pages.  Selecting this
+         will use one page flag and increase the code size a little,
+         say Y unless you know what you are doing.
+
 config MMU_NOTIFIER
        bool
index da4ccf015aea72ba4d18dc69cf47e7ac97481129..c06b45a1ff5f64cf2e007258bfdaf9af5c280d98 100644 (file)
@@ -33,5 +33,4 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
-
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
index 903bf316912a68882dadfb0144aef035a6bd5fda..ab8553658af3fb2fcfac7b0c80bc2ed8d1343721 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
 #include <linux/memcontrol.h>
+#include <linux/mm_inline.h> /* for page_is_file_cache() */
 #include "internal.h"
 
 /*
@@ -115,12 +116,12 @@ void __remove_from_page_cache(struct page *page)
 {
        struct address_space *mapping = page->mapping;
 
-       mem_cgroup_uncharge_cache_page(page);
        radix_tree_delete(&mapping->page_tree, page->index);
        page->mapping = NULL;
        mapping->nrpages--;
        __dec_zone_page_state(page, NR_FILE_PAGES);
        BUG_ON(page_mapped(page));
+       mem_cgroup_uncharge_cache_page(page);
 
        /*
         * Some filesystems seem to re-dirty the page even after
@@ -492,9 +493,24 @@ EXPORT_SYMBOL(add_to_page_cache_locked);
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
                                pgoff_t offset, gfp_t gfp_mask)
 {
-       int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
-       if (ret == 0)
-               lru_cache_add(page);
+       int ret;
+
+       /*
+        * Splice_read and readahead add shmem/tmpfs pages into the page cache
+        * before shmem_readpage has a chance to mark them as SwapBacked: they
+        * need to go on the active_anon lru below, and mem_cgroup_cache_charge
+        * (called in add_to_page_cache) needs to know where they're going too.
+        */
+       if (mapping_cap_swap_backed(mapping))
+               SetPageSwapBacked(page);
+
+       ret = add_to_page_cache(page, mapping, offset, gfp_mask);
+       if (ret == 0) {
+               if (page_is_file_cache(page))
+                       lru_cache_add_file(page);
+               else
+                       lru_cache_add_active_anon(page);
+       }
        return ret;
 }
 
@@ -557,17 +573,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
  * mechananism between PageLocked pages and PageWriteback pages is shared.
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
  *
- * The first mb is necessary to safely close the critical section opened by the
- * test_and_set_bit() to lock the page; the second mb is necessary to enforce
- * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
- * races with a parallel wait_on_page_locked()).
+ * The mb is necessary to enforce ordering between the clear_bit and the read
+ * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
  */
 void unlock_page(struct page *page)
 {
-       smp_mb__before_clear_bit();
-       if (!test_and_clear_bit(PG_locked, &page->flags))
-               BUG();
-       smp_mb__after_clear_bit(); 
+       VM_BUG_ON(!PageLocked(page));
+       clear_bit_unlock(PG_locked, &page->flags);
+       smp_mb__after_clear_bit();
        wake_up_page(page, PG_locked);
 }
 EXPORT_SYMBOL(unlock_page);
index 7881638e4a12bd8c5489c8d08b5779b5ae5a0080..7d12ca70ef7bf22c7e1efcdee186adad87a6c8c8 100644 (file)
@@ -21,6 +21,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
                        unsigned long addr, pte_t *ptep)
 {
@@ -215,15 +217,31 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
                spin_unlock(&mapping->i_mmap_lock);
        }
 
+       if (vma->vm_flags & VM_LOCKED) {
+               /*
+                * drop PG_Mlocked flag for over-mapped range
+                */
+               unsigned int saved_flags = vma->vm_flags;
+               munlock_vma_pages_range(vma, start, start + size);
+               vma->vm_flags = saved_flags;
+       }
+
        mmu_notifier_invalidate_range_start(mm, start, start + size);
        err = populate_range(mm, vma, start, size, pgoff);
        mmu_notifier_invalidate_range_end(mm, start, start + size);
        if (!err && !(flags & MAP_NONBLOCK)) {
-               if (unlikely(has_write_lock)) {
-                       downgrade_write(&mm->mmap_sem);
-                       has_write_lock = 0;
+               if (vma->vm_flags & VM_LOCKED) {
+                       /*
+                        * might be mapping previously unmapped range of file
+                        */
+                       mlock_vma_pages_range(vma, start, start + size);
+               } else {
+                       if (unlikely(has_write_lock)) {
+                               downgrade_write(&mm->mmap_sem);
+                               has_write_lock = 0;
+                       }
+                       make_pages_present(start, start+size);
                }
-               make_pages_present(start, start+size);
        }
 
        /*
@@ -240,4 +258,3 @@ out:
 
        return err;
 }
-
index 38633864a93e874009d9e6aa00619cf7471b0d9e..ce8cbb29860bd1b867454014195fe497332e2f61 100644 (file)
@@ -262,7 +262,7 @@ struct resv_map {
        struct list_head regions;
 };
 
-struct resv_map *resv_map_alloc(void)
+static struct resv_map *resv_map_alloc(void)
 {
        struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
        if (!resv_map)
@@ -274,7 +274,7 @@ struct resv_map *resv_map_alloc(void)
        return resv_map;
 }
 
-void resv_map_release(struct kref *ref)
+static void resv_map_release(struct kref *ref)
 {
        struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
 
@@ -289,7 +289,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
        if (!(vma->vm_flags & VM_SHARED))
                return (struct resv_map *)(get_vma_private_data(vma) &
                                                        ~HPAGE_RESV_MASK);
-       return 0;
+       return NULL;
 }
 
 static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
@@ -1459,11 +1459,11 @@ int hugetlb_report_meminfo(char *buf)
 {
        struct hstate *h = &default_hstate;
        return sprintf(buf,
-                       "HugePages_Total: %5lu\n"
-                       "HugePages_Free:  %5lu\n"
-                       "HugePages_Rsvd:  %5lu\n"
-                       "HugePages_Surp:  %5lu\n"
-                       "Hugepagesize:    %5lu kB\n",
+                       "HugePages_Total:   %5lu\n"
+                       "HugePages_Free:    %5lu\n"
+                       "HugePages_Rsvd:    %5lu\n"
+                       "HugePages_Surp:    %5lu\n"
+                       "Hugepagesize:   %8lu kB\n",
                        h->nr_huge_pages,
                        h->free_huge_pages,
                        h->resv_huge_pages,
@@ -1747,10 +1747,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
  * from other VMAs and let the children be SIGKILLed if they are faulting the
  * same region.
  */
-int unmap_ref_private(struct mm_struct *mm,
-                                       struct vm_area_struct *vma,
-                                       struct page *page,
-                                       unsigned long address)
+static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+                               struct page *page, unsigned long address)
 {
        struct vm_area_struct *iter_vma;
        struct address_space *mapping;
@@ -2073,6 +2071,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
        return NULL;
 }
 
+static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
+{
+       if (!ptep || write || shared)
+               return 0;
+       else
+               return huge_pte_none(huge_ptep_get(ptep));
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        struct page **pages, struct vm_area_struct **vmas,
                        unsigned long *position, int *length, int i,
@@ -2082,6 +2088,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long vaddr = *position;
        int remainder = *length;
        struct hstate *h = hstate_vma(vma);
+       int zeropage_ok = 0;
+       int shared = vma->vm_flags & VM_SHARED;
 
        spin_lock(&mm->page_table_lock);
        while (vaddr < vma->vm_end && remainder) {
@@ -2094,8 +2102,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 * first, for the page indexing below to work.
                 */
                pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+               if (huge_zeropage_ok(pte, write, shared))
+                       zeropage_ok = 1;
 
-               if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
+               if (!pte ||
+                   (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
                    (write && !pte_write(huge_ptep_get(pte)))) {
                        int ret;
 
@@ -2115,8 +2126,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                page = pte_page(huge_ptep_get(pte));
 same_page:
                if (pages) {
-                       get_page(page);
-                       pages[i] = page + pfn_offset;
+                       if (zeropage_ok)
+                               pages[i] = ZERO_PAGE(0);
+                       else
+                               pages[i] = page + pfn_offset;
+                       get_page(pages[i]);
                }
 
                if (vmas)
index 1f43f7416972d3c4f1b07ab04898008a8ec76a81..e4e728bdf324fc62a24ba0c951869989ab081ea1 100644 (file)
@@ -39,6 +39,15 @@ static inline void __put_page(struct page *page)
        atomic_dec(&page->_count);
 }
 
+/*
+ * in mm/vmscan.c:
+ */
+extern int isolate_lru_page(struct page *page);
+extern void putback_lru_page(struct page *page);
+
+/*
+ * in mm/page_alloc.c
+ */
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 
 /*
@@ -52,6 +61,120 @@ static inline unsigned long page_order(struct page *page)
        return page_private(page);
 }
 
+extern long mlock_vma_pages_range(struct vm_area_struct *vma,
+                       unsigned long start, unsigned long end);
+extern void munlock_vma_pages_range(struct vm_area_struct *vma,
+                       unsigned long start, unsigned long end);
+static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
+{
+       munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
+}
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * unevictable_migrate_page() called only from migrate_page_copy() to
+ * migrate unevictable flag to new page.
+ * Note that the old page has been isolated from the LRU lists at this
+ * point so we don't need to worry about LRU statistics.
+ */
+static inline void unevictable_migrate_page(struct page *new, struct page *old)
+{
+       if (TestClearPageUnevictable(old))
+               SetPageUnevictable(new);
+}
+#else
+static inline void unevictable_migrate_page(struct page *new, struct page *old)
+{
+}
+#endif
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * Called only in fault path via page_evictable() for a new page
+ * to determine if it's being mapped into a LOCKED vma.
+ * If so, mark page as mlocked.
+ */
+static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page)
+{
+       VM_BUG_ON(PageLRU(page));
+
+       if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
+               return 0;
+
+       if (!TestSetPageMlocked(page)) {
+               inc_zone_page_state(page, NR_MLOCK);
+               count_vm_event(UNEVICTABLE_PGMLOCKED);
+       }
+       return 1;
+}
+
+/*
+ * must be called with vma's mmap_sem held for read, and page locked.
+ */
+extern void mlock_vma_page(struct page *page);
+
+/*
+ * Clear the page's PageMlocked().  This can be useful in a situation where
+ * we want to unconditionally remove a page from the pagecache -- e.g.,
+ * on truncation or freeing.
+ *
+ * It is legal to call this function for any page, mlocked or not.
+ * If called for a page that is still mapped by mlocked vmas, all we do
+ * is revert to lazy LRU behaviour -- semantics are not broken.
+ */
+extern void __clear_page_mlock(struct page *page);
+static inline void clear_page_mlock(struct page *page)
+{
+       if (unlikely(TestClearPageMlocked(page)))
+               __clear_page_mlock(page);
+}
+
+/*
+ * mlock_migrate_page - called only from migrate_page_copy() to
+ * migrate the Mlocked page flag; update statistics.
+ */
+static inline void mlock_migrate_page(struct page *newpage, struct page *page)
+{
+       if (TestClearPageMlocked(page)) {
+               unsigned long flags;
+
+               local_irq_save(flags);
+               __dec_zone_page_state(page, NR_MLOCK);
+               SetPageMlocked(newpage);
+               __inc_zone_page_state(newpage, NR_MLOCK);
+               local_irq_restore(flags);
+       }
+}
+
+/*
+ * free_page_mlock() -- clean up attempts to free and mlocked() page.
+ * Page should not be on lru, so no need to fix that up.
+ * free_pages_check() will verify...
+ */
+static inline void free_page_mlock(struct page *page)
+{
+       if (unlikely(TestClearPageMlocked(page))) {
+               unsigned long flags;
+
+               local_irq_save(flags);
+               __dec_zone_page_state(page, NR_MLOCK);
+               __count_vm_event(UNEVICTABLE_MLOCKFREED);
+               local_irq_restore(flags);
+       }
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p)
+{
+       return 0;
+}
+static inline void clear_page_mlock(struct page *page) { }
+static inline void mlock_vma_page(struct page *page) { }
+static inline void mlock_migrate_page(struct page *new, struct page *old) { }
+static inline void free_page_mlock(struct page *page) { }
+
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
 /*
  * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
  * so all functions starting at paging_init should be marked __init
@@ -120,4 +243,12 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 }
 #endif /* CONFIG_SPARSEMEM */
 
+#define GUP_FLAGS_WRITE                  0x1
+#define GUP_FLAGS_FORCE                  0x2
+#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4
+
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+                    unsigned long start, int len, int flags,
+                    struct page **pages, struct vm_area_struct **vmas);
+
 #endif
index 36896f3eb7f5e5c2e4c3803cc8125fe461a71669..d4a92b63e98e1da2fa619a7272cd0db3d15f18f1 100644 (file)
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/vmalloc.h>
+#include <linux/mm_inline.h>
+#include <linux/page_cgroup.h>
 
 #include <asm/uaccess.h>
 
 struct cgroup_subsys mem_cgroup_subsys __read_mostly;
-static struct kmem_cache *page_cgroup_cache __read_mostly;
 #define MEM_CGROUP_RECLAIM_RETRIES     5
 
 /*
@@ -65,11 +66,10 @@ struct mem_cgroup_stat {
 /*
  * For accounting under irq disable, no need for increment preempt count.
  */
-static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat,
+static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat,
                enum mem_cgroup_stat_index idx, int val)
 {
-       int cpu = smp_processor_id();
-       stat->cpustat[cpu].count[idx] += val;
+       stat->count[idx] += val;
 }
 
 static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
@@ -85,22 +85,13 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
 /*
  * per-zone information in memory controller.
  */
-
-enum mem_cgroup_zstat_index {
-       MEM_CGROUP_ZSTAT_ACTIVE,
-       MEM_CGROUP_ZSTAT_INACTIVE,
-
-       NR_MEM_CGROUP_ZSTAT,
-};
-
 struct mem_cgroup_per_zone {
        /*
         * spin_lock to protect the per cgroup LRU
         */
        spinlock_t              lru_lock;
-       struct list_head        active_list;
-       struct list_head        inactive_list;
-       unsigned long count[NR_MEM_CGROUP_ZSTAT];
+       struct list_head        lists[NR_LRU_LISTS];
+       unsigned long           count[NR_LRU_LISTS];
 };
 /* Macro for accessing counter */
 #define MEM_CGROUP_ZSTAT(mz, idx)      ((mz)->count[(idx)])
@@ -144,69 +135,52 @@ struct mem_cgroup {
 };
 static struct mem_cgroup init_mem_cgroup;
 
-/*
- * We use the lower bit of the page->page_cgroup pointer as a bit spin
- * lock.  We need to ensure that page->page_cgroup is at least two
- * byte aligned (based on comments from Nick Piggin).  But since
- * bit_spin_lock doesn't actually set that lock bit in a non-debug
- * uniprocessor kernel, we should avoid setting it here too.
- */
-#define PAGE_CGROUP_LOCK_BIT   0x0
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-#define PAGE_CGROUP_LOCK       (1 << PAGE_CGROUP_LOCK_BIT)
-#else
-#define PAGE_CGROUP_LOCK       0x0
-#endif
-
-/*
- * A page_cgroup page is associated with every page descriptor. The
- * page_cgroup helps us identify information about the cgroup
- */
-struct page_cgroup {
-       struct list_head lru;           /* per cgroup LRU list */
-       struct page *page;
-       struct mem_cgroup *mem_cgroup;
-       int flags;
-};
-#define PAGE_CGROUP_FLAG_CACHE (0x1)   /* charged as cache */
-#define PAGE_CGROUP_FLAG_ACTIVE (0x2)  /* page is active in this cgroup */
-
-static int page_cgroup_nid(struct page_cgroup *pc)
-{
-       return page_to_nid(pc->page);
-}
-
-static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
-{
-       return page_zonenum(pc->page);
-}
-
 enum charge_type {
        MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
        MEM_CGROUP_CHARGE_TYPE_MAPPED,
+       MEM_CGROUP_CHARGE_TYPE_SHMEM,   /* used by page migration of shmem */
        MEM_CGROUP_CHARGE_TYPE_FORCE,   /* used by force_empty */
+       NR_CHARGE_TYPE,
+};
+
+/* only for here (for easy reading.) */
+#define PCGF_CACHE     (1UL << PCG_CACHE)
+#define PCGF_USED      (1UL << PCG_USED)
+#define PCGF_ACTIVE    (1UL << PCG_ACTIVE)
+#define PCGF_LOCK      (1UL << PCG_LOCK)
+#define PCGF_FILE      (1UL << PCG_FILE)
+static const unsigned long
+pcg_default_flags[NR_CHARGE_TYPE] = {
+       PCGF_CACHE | PCGF_FILE | PCGF_USED | PCGF_LOCK, /* File Cache */
+       PCGF_ACTIVE | PCGF_USED | PCGF_LOCK, /* Anon */
+       PCGF_ACTIVE | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
+       0, /* FORCE */
 };
 
 /*
  * Always modified under lru lock. Then, not necessary to preempt_disable()
  */
-static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
-                                       bool charge)
+static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
+                                        struct page_cgroup *pc,
+                                        bool charge)
 {
        int val = (charge)? 1 : -1;
        struct mem_cgroup_stat *stat = &mem->stat;
+       struct mem_cgroup_stat_cpu *cpustat;
 
        VM_BUG_ON(!irqs_disabled());
-       if (flags & PAGE_CGROUP_FLAG_CACHE)
-               __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val);
+
+       cpustat = &stat->cpustat[smp_processor_id()];
+       if (PageCgroupCache(pc))
+               __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
        else
-               __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
+               __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
 
        if (charge)
-               __mem_cgroup_stat_add_safe(stat,
+               __mem_cgroup_stat_add_safe(cpustat,
                                MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
        else
-               __mem_cgroup_stat_add_safe(stat,
+               __mem_cgroup_stat_add_safe(cpustat,
                                MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
 }
 
@@ -227,7 +201,7 @@ page_cgroup_zoneinfo(struct page_cgroup *pc)
 }
 
 static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem,
-                                       enum mem_cgroup_zstat_index idx)
+                                       enum lru_list idx)
 {
        int nid, zid;
        struct mem_cgroup_per_zone *mz;
@@ -262,85 +236,77 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
                                struct mem_cgroup, css);
 }
 
-static inline int page_cgroup_locked(struct page *page)
-{
-       return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
-{
-       VM_BUG_ON(!page_cgroup_locked(page));
-       page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK);
-}
-
-struct page_cgroup *page_get_page_cgroup(struct page *page)
-{
-       return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK);
-}
-
-static void lock_page_cgroup(struct page *page)
-{
-       bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static int try_lock_page_cgroup(struct page *page)
-{
-       return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
-static void unlock_page_cgroup(struct page *page)
-{
-       bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
-}
-
 static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
                        struct page_cgroup *pc)
 {
-       int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
+       int lru = LRU_BASE;
+
+       if (PageCgroupUnevictable(pc))
+               lru = LRU_UNEVICTABLE;
+       else {
+               if (PageCgroupActive(pc))
+                       lru += LRU_ACTIVE;
+               if (PageCgroupFile(pc))
+                       lru += LRU_FILE;
+       }
 
-       if (from)
-               MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
-       else
-               MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
+       MEM_CGROUP_ZSTAT(mz, lru) -= 1;
 
-       mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
+       mem_cgroup_charge_statistics(pc->mem_cgroup, pc, false);
        list_del(&pc->lru);
 }
 
 static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
                                struct page_cgroup *pc)
 {
-       int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
-
-       if (!to) {
-               MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
-               list_add(&pc->lru, &mz->inactive_list);
-       } else {
-               MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
-               list_add(&pc->lru, &mz->active_list);
+       int lru = LRU_BASE;
+
+       if (PageCgroupUnevictable(pc))
+               lru = LRU_UNEVICTABLE;
+       else {
+               if (PageCgroupActive(pc))
+                       lru += LRU_ACTIVE;
+               if (PageCgroupFile(pc))
+                       lru += LRU_FILE;
        }
-       mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true);
+
+       MEM_CGROUP_ZSTAT(mz, lru) += 1;
+       list_add(&pc->lru, &mz->lists[lru]);
+
+       mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true);
 }
 
-static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
+static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru)
 {
-       int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
        struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
+       int active    = PageCgroupActive(pc);
+       int file      = PageCgroupFile(pc);
+       int unevictable = PageCgroupUnevictable(pc);
+       enum lru_list from = unevictable ? LRU_UNEVICTABLE :
+                               (LRU_FILE * !!file + !!active);
 
-       if (from)
-               MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
-       else
-               MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
+       if (lru == from)
+               return;
 
-       if (active) {
-               MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
-               pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
-               list_move(&pc->lru, &mz->active_list);
+       MEM_CGROUP_ZSTAT(mz, from) -= 1;
+       /*
+        * However this is done under mz->lru_lock, another flags, which
+        * are not related to LRU, will be modified from out-of-lock.
+        * We have to use atomic set/clear flags.
+        */
+       if (is_unevictable_lru(lru)) {
+               ClearPageCgroupActive(pc);
+               SetPageCgroupUnevictable(pc);
        } else {
-               MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
-               pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
-               list_move(&pc->lru, &mz->inactive_list);
+               if (is_active_lru(lru))
+                       SetPageCgroupActive(pc);
+               else
+                       ClearPageCgroupActive(pc);
+               ClearPageCgroupUnevictable(pc);
        }
+
+       MEM_CGROUP_ZSTAT(mz, lru) += 1;
+       list_move(&pc->lru, &mz->lists[lru]);
 }
 
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
@@ -356,7 +322,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
 /*
  * This routine assumes that the appropriate zone's lru lock is already held
  */
-void mem_cgroup_move_lists(struct page *page, bool active)
+void mem_cgroup_move_lists(struct page *page, enum lru_list lru)
 {
        struct page_cgroup *pc;
        struct mem_cgroup_per_zone *mz;
@@ -372,17 +338,16 @@ void mem_cgroup_move_lists(struct page *page, bool active)
         * safely get to page_cgroup without it, so just try_lock it:
         * mem_cgroup_isolate_pages allows for page left on wrong list.
         */
-       if (!try_lock_page_cgroup(page))
+       pc = lookup_page_cgroup(page);
+       if (!trylock_page_cgroup(pc))
                return;
-
-       pc = page_get_page_cgroup(page);
-       if (pc) {
+       if (pc && PageCgroupUsed(pc)) {
                mz = page_cgroup_zoneinfo(pc);
                spin_lock_irqsave(&mz->lru_lock, flags);
-               __mem_cgroup_move_lists(pc, active);
+               __mem_cgroup_move_lists(pc, lru);
                spin_unlock_irqrestore(&mz->lru_lock, flags);
        }
-       unlock_page_cgroup(page);
+       unlock_page_cgroup(pc);
 }
 
 /*
@@ -402,21 +367,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
        return (int)((rss * 100L) / total);
 }
 
-/*
- * This function is called from vmscan.c. In page reclaiming loop. balance
- * between active and inactive list is calculated. For memory controller
- * page reclaiming, we should use using mem_cgroup's imbalance rather than
- * zone's global lru imbalance.
- */
-long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
-{
-       unsigned long active, inactive;
-       /* active and inactive are the number of pages. 'long' is ok.*/
-       active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE);
-       inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE);
-       return (long) (active / (inactive + 1));
-}
-
 /*
  * prev_priority control...this will be used in memory reclaim path.
  */
@@ -444,28 +394,17 @@ void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
  * (see include/linux/mmzone.h)
  */
 
-long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
-                                  struct zone *zone, int priority)
+long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
+                                       int priority, enum lru_list lru)
 {
-       long nr_active;
+       long nr_pages;
        int nid = zone->zone_pgdat->node_id;
        int zid = zone_idx(zone);
        struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
 
-       nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE);
-       return (nr_active >> priority);
-}
+       nr_pages = MEM_CGROUP_ZSTAT(mz, lru);
 
-long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
-                                       struct zone *zone, int priority)
-{
-       long nr_inactive;
-       int nid = zone->zone_pgdat->node_id;
-       int zid = zone_idx(zone);
-       struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
-
-       nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE);
-       return (nr_inactive >> priority);
+       return (nr_pages >> priority);
 }
 
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -473,7 +412,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
                                        unsigned long *scanned, int order,
                                        int mode, struct zone *z,
                                        struct mem_cgroup *mem_cont,
-                                       int active)
+                                       int active, int file)
 {
        unsigned long nr_taken = 0;
        struct page *page;
@@ -484,38 +423,38 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
        int nid = z->zone_pgdat->node_id;
        int zid = zone_idx(z);
        struct mem_cgroup_per_zone *mz;
+       int lru = LRU_FILE * !!file + !!active;
 
        BUG_ON(!mem_cont);
        mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
-       if (active)
-               src = &mz->active_list;
-       else
-               src = &mz->inactive_list;
-
+       src = &mz->lists[lru];
 
        spin_lock(&mz->lru_lock);
        scan = 0;
        list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
                if (scan >= nr_to_scan)
                        break;
+               if (unlikely(!PageCgroupUsed(pc)))
+                       continue;
                page = pc->page;
 
                if (unlikely(!PageLRU(page)))
                        continue;
 
-               if (PageActive(page) && !active) {
-                       __mem_cgroup_move_lists(pc, true);
-                       continue;
-               }
-               if (!PageActive(page) && active) {
-                       __mem_cgroup_move_lists(pc, false);
+               /*
+                * TODO: play better with lumpy reclaim, grabbing anything.
+                */
+               if (PageUnevictable(page) ||
+                   (PageActive(page) && !active) ||
+                   (!PageActive(page) && active)) {
+                       __mem_cgroup_move_lists(pc, page_lru(page));
                        continue;
                }
 
                scan++;
                list_move(&pc->lru, &pc_list);
 
-               if (__isolate_lru_page(page, mode) == 0) {
+               if (__isolate_lru_page(page, mode, file) == 0) {
                        list_move(&page->lru, dst);
                        nr_taken++;
                }
@@ -540,26 +479,27 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 {
        struct mem_cgroup *mem;
        struct page_cgroup *pc;
-       unsigned long flags;
        unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
        struct mem_cgroup_per_zone *mz;
+       unsigned long flags;
 
-       pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
-       if (unlikely(pc == NULL))
-               goto err;
-
+       pc = lookup_page_cgroup(page);
+       /* can happen at boot */
+       if (unlikely(!pc))
+               return 0;
+       prefetchw(pc);
        /*
         * We always charge the cgroup the mm_struct belongs to.
         * The mm_struct's mem_cgroup changes on task migration if the
         * thread group leader migrates. It's possible that mm is not
         * set, if so charge the init_mm (happens for pagecache usage).
         */
+
        if (likely(!memcg)) {
                rcu_read_lock();
                mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
                if (unlikely(!mem)) {
                        rcu_read_unlock();
-                       kmem_cache_free(page_cgroup_cache, pc);
                        return 0;
                }
                /*
@@ -572,7 +512,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
                css_get(&memcg->css);
        }
 
-       while (res_counter_charge(&mem->res, PAGE_SIZE)) {
+       while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
                if (!(gfp_mask & __GFP_WAIT))
                        goto out;
 
@@ -595,39 +535,33 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
                }
        }
 
-       pc->mem_cgroup = mem;
-       pc->page = page;
-       /*
-        * If a page is accounted as a page cache, insert to inactive list.
-        * If anon, insert to active list.
-        */
-       if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
-               pc->flags = PAGE_CGROUP_FLAG_CACHE;
-       else
-               pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
 
-       lock_page_cgroup(page);
-       if (unlikely(page_get_page_cgroup(page))) {
-               unlock_page_cgroup(page);
+       lock_page_cgroup(pc);
+       if (unlikely(PageCgroupUsed(pc))) {
+               unlock_page_cgroup(pc);
                res_counter_uncharge(&mem->res, PAGE_SIZE);
                css_put(&mem->css);
-               kmem_cache_free(page_cgroup_cache, pc);
+
                goto done;
        }
-       page_assign_page_cgroup(page, pc);
+       pc->mem_cgroup = mem;
+       /*
+        * If a page is accounted as a page cache, insert to inactive list.
+        * If anon, insert to active list.
+        */
+       pc->flags = pcg_default_flags[ctype];
 
        mz = page_cgroup_zoneinfo(pc);
+
        spin_lock_irqsave(&mz->lru_lock, flags);
        __mem_cgroup_add_list(mz, pc);
        spin_unlock_irqrestore(&mz->lru_lock, flags);
+       unlock_page_cgroup(pc);
 
-       unlock_page_cgroup(page);
 done:
        return 0;
 out:
        css_put(&mem->css);
-       kmem_cache_free(page_cgroup_cache, pc);
-err:
        return -ENOMEM;
 }
 
@@ -635,7 +569,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
        if (mem_cgroup_subsys.disabled)
                return 0;
-
+       if (PageCompound(page))
+               return 0;
        /*
         * If already mapped, we don't have to account.
         * If page cache, page->mapping has address_space.
@@ -656,7 +591,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 {
        if (mem_cgroup_subsys.disabled)
                return 0;
-
+       if (PageCompound(page))
+               return 0;
        /*
         * Corner case handling. This is called from add_to_page_cache()
         * in usual. But some FS (shmem) precharges this page before calling it
@@ -669,22 +605,27 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
        if (!(gfp_mask & __GFP_WAIT)) {
                struct page_cgroup *pc;
 
-               lock_page_cgroup(page);
-               pc = page_get_page_cgroup(page);
-               if (pc) {
-                       VM_BUG_ON(pc->page != page);
-                       VM_BUG_ON(!pc->mem_cgroup);
-                       unlock_page_cgroup(page);
+
+               pc = lookup_page_cgroup(page);
+               if (!pc)
+                       return 0;
+               lock_page_cgroup(pc);
+               if (PageCgroupUsed(pc)) {
+                       unlock_page_cgroup(pc);
                        return 0;
                }
-               unlock_page_cgroup(page);
+               unlock_page_cgroup(pc);
        }
 
        if (unlikely(!mm))
                mm = &init_mm;
 
-       return mem_cgroup_charge_common(page, mm, gfp_mask,
+       if (page_is_file_cache(page))
+               return mem_cgroup_charge_common(page, mm, gfp_mask,
                                MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+       else
+               return mem_cgroup_charge_common(page, mm, gfp_mask,
+                               MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
 }
 
 /*
@@ -704,44 +645,46 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
        /*
         * Check if our page_cgroup is valid
         */
-       lock_page_cgroup(page);
-       pc = page_get_page_cgroup(page);
-       if (unlikely(!pc))
-               goto unlock;
-
-       VM_BUG_ON(pc->page != page);
+       pc = lookup_page_cgroup(page);
+       if (unlikely(!pc || !PageCgroupUsed(pc)))
+               return;
 
-       if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
-           && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
-               || page_mapped(page)))
-               goto unlock;
+       lock_page_cgroup(pc);
+       if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page))
+            || !PageCgroupUsed(pc)) {
+               /* This happens at race in zap_pte_range() and do_swap_page()*/
+               unlock_page_cgroup(pc);
+               return;
+       }
+       ClearPageCgroupUsed(pc);
+       mem = pc->mem_cgroup;
 
        mz = page_cgroup_zoneinfo(pc);
        spin_lock_irqsave(&mz->lru_lock, flags);
        __mem_cgroup_remove_list(mz, pc);
        spin_unlock_irqrestore(&mz->lru_lock, flags);
+       unlock_page_cgroup(pc);
 
-       page_assign_page_cgroup(page, NULL);
-       unlock_page_cgroup(page);
-
-       mem = pc->mem_cgroup;
        res_counter_uncharge(&mem->res, PAGE_SIZE);
        css_put(&mem->css);
 
-       kmem_cache_free(page_cgroup_cache, pc);
        return;
-unlock:
-       unlock_page_cgroup(page);
 }
 
 void mem_cgroup_uncharge_page(struct page *page)
 {
+       /* early check. */
+       if (page_mapped(page))
+               return;
+       if (page->mapping && !PageAnon(page))
+               return;
        __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
 void mem_cgroup_uncharge_cache_page(struct page *page)
 {
        VM_BUG_ON(page_mapped(page));
+       VM_BUG_ON(page->mapping);
        __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
 }
 
@@ -758,15 +701,19 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
        if (mem_cgroup_subsys.disabled)
                return 0;
 
-       lock_page_cgroup(page);
-       pc = page_get_page_cgroup(page);
-       if (pc) {
+       pc = lookup_page_cgroup(page);
+       lock_page_cgroup(pc);
+       if (PageCgroupUsed(pc)) {
                mem = pc->mem_cgroup;
                css_get(&mem->css);
-               if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
-                       ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+               if (PageCgroupCache(pc)) {
+                       if (page_is_file_cache(page))
+                               ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+                       else
+                               ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+               }
        }
-       unlock_page_cgroup(page);
+       unlock_page_cgroup(pc);
        if (mem) {
                ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
                        ctype, mem);
@@ -791,7 +738,7 @@ void mem_cgroup_end_migration(struct page *newpage)
         */
        if (!newpage->mapping)
                __mem_cgroup_uncharge_common(newpage,
-                                        MEM_CGROUP_CHARGE_TYPE_FORCE);
+                               MEM_CGROUP_CHARGE_TYPE_FORCE);
        else if (PageAnon(newpage))
                mem_cgroup_uncharge_page(newpage);
 }
@@ -863,7 +810,7 @@ int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
 #define FORCE_UNCHARGE_BATCH   (128)
 static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
                            struct mem_cgroup_per_zone *mz,
-                           int active)
+                           enum lru_list lru)
 {
        struct page_cgroup *pc;
        struct page *page;
@@ -871,15 +818,14 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
        unsigned long flags;
        struct list_head *list;
 
-       if (active)
-               list = &mz->active_list;
-       else
-               list = &mz->inactive_list;
+       list = &mz->lists[lru];
 
        spin_lock_irqsave(&mz->lru_lock, flags);
        while (!list_empty(list)) {
                pc = list_entry(list->prev, struct page_cgroup, lru);
                page = pc->page;
+               if (!PageCgroupUsed(pc))
+                       break;
                get_page(page);
                spin_unlock_irqrestore(&mz->lru_lock, flags);
                /*
@@ -894,8 +840,10 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
                                count = FORCE_UNCHARGE_BATCH;
                                cond_resched();
                        }
-               } else
-                       cond_resched();
+               } else {
+                       spin_lock_irqsave(&mz->lru_lock, flags);
+                       break;
+               }
                spin_lock_irqsave(&mz->lru_lock, flags);
        }
        spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -919,15 +867,17 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
        while (mem->res.usage > 0) {
                if (atomic_read(&mem->css.cgroup->count) > 0)
                        goto out;
+               /* This is for making all *used* pages to be on LRU. */
+               lru_add_drain_all();
                for_each_node_state(node, N_POSSIBLE)
                        for (zid = 0; zid < MAX_NR_ZONES; zid++) {
                                struct mem_cgroup_per_zone *mz;
+                               enum lru_list l;
                                mz = mem_cgroup_zoneinfo(mem, node, zid);
-                               /* drop all page_cgroup in active_list */
-                               mem_cgroup_force_empty_list(mem, mz, 1);
-                               /* drop all page_cgroup in inactive_list */
-                               mem_cgroup_force_empty_list(mem, mz, 0);
+                               for_each_lru(l)
+                                       mem_cgroup_force_empty_list(mem, mz, l);
                        }
+               cond_resched();
        }
        ret = 0;
 out:
@@ -1012,14 +962,27 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
        }
        /* showing # of active pages */
        {
-               unsigned long active, inactive;
-
-               inactive = mem_cgroup_get_all_zonestat(mem_cont,
-                                               MEM_CGROUP_ZSTAT_INACTIVE);
-               active = mem_cgroup_get_all_zonestat(mem_cont,
-                                               MEM_CGROUP_ZSTAT_ACTIVE);
-               cb->fill(cb, "active", (active) * PAGE_SIZE);
-               cb->fill(cb, "inactive", (inactive) * PAGE_SIZE);
+               unsigned long active_anon, inactive_anon;
+               unsigned long active_file, inactive_file;
+               unsigned long unevictable;
+
+               inactive_anon = mem_cgroup_get_all_zonestat(mem_cont,
+                                               LRU_INACTIVE_ANON);
+               active_anon = mem_cgroup_get_all_zonestat(mem_cont,
+                                               LRU_ACTIVE_ANON);
+               inactive_file = mem_cgroup_get_all_zonestat(mem_cont,
+                                               LRU_INACTIVE_FILE);
+               active_file = mem_cgroup_get_all_zonestat(mem_cont,
+                                               LRU_ACTIVE_FILE);
+               unevictable = mem_cgroup_get_all_zonestat(mem_cont,
+                                                       LRU_UNEVICTABLE);
+
+               cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE);
+               cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE);
+               cb->fill(cb, "active_file", (active_file) * PAGE_SIZE);
+               cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE);
+               cb->fill(cb, "unevictable", unevictable * PAGE_SIZE);
+
        }
        return 0;
 }
@@ -1062,6 +1025,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 {
        struct mem_cgroup_per_node *pn;
        struct mem_cgroup_per_zone *mz;
+       enum lru_list l;
        int zone, tmp = node;
        /*
         * This routine is called against possible nodes.
@@ -1082,9 +1046,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 
        for (zone = 0; zone < MAX_NR_ZONES; zone++) {
                mz = &pn->zoneinfo[zone];
-               INIT_LIST_HEAD(&mz->active_list);
-               INIT_LIST_HEAD(&mz->inactive_list);
                spin_lock_init(&mz->lru_lock);
+               for_each_lru(l)
+                       INIT_LIST_HEAD(&mz->lists[l]);
        }
        return 0;
 }
@@ -1124,8 +1088,8 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        int node;
 
        if (unlikely((cont->parent) == NULL)) {
+               page_cgroup_init();
                mem = &init_mem_cgroup;
-               page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC);
        } else {
                mem = mem_cgroup_alloc();
                if (!mem)
index 1002f473f497c37f34c5c33d4dfa7d2bd49faed0..3a6c4a6583256584303c4ac7c8813938abc49ed0 100644 (file)
@@ -64,6 +64,8 @@
 
 #include "internal.h"
 
+#include "internal.h"
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
@@ -1129,12 +1131,17 @@ static inline int use_zero_page(struct vm_area_struct *vma)
        return !vma->vm_ops || !vma->vm_ops->fault;
 }
 
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-               unsigned long start, int len, int write, int force,
+
+
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+                    unsigned long start, int len, int flags,
                struct page **pages, struct vm_area_struct **vmas)
 {
        int i;
-       unsigned int vm_flags;
+       unsigned int vm_flags = 0;
+       int write = !!(flags & GUP_FLAGS_WRITE);
+       int force = !!(flags & GUP_FLAGS_FORCE);
+       int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 
        if (len <= 0)
                return 0;
@@ -1158,7 +1165,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                        pud_t *pud;
                        pmd_t *pmd;
                        pte_t *pte;
-                       if (write) /* user gate pages are read-only */
+
+                       /* user gate pages are read-only */
+                       if (!ignore && write)
                                return i ? : -EFAULT;
                        if (pg > TASK_SIZE)
                                pgd = pgd_offset_k(pg);
@@ -1190,8 +1199,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                        continue;
                }
 
-               if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
-                               || !(vm_flags & vma->vm_flags))
+               if (!vma ||
+                   (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
+                   (!ignore && !(vm_flags & vma->vm_flags)))
                        return i ? : -EFAULT;
 
                if (is_vm_hugetlb_page(vma)) {
@@ -1266,6 +1276,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
        } while (len);
        return i;
 }
+
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+               unsigned long start, int len, int write, int force,
+               struct page **pages, struct vm_area_struct **vmas)
+{
+       int flags = 0;
+
+       if (write)
+               flags |= GUP_FLAGS_WRITE;
+       if (force)
+               flags |= GUP_FLAGS_FORCE;
+
+       return __get_user_pages(tsk, mm,
+                               start, len, flags,
+                               pages, vmas);
+}
+
 EXPORT_SYMBOL(get_user_pages);
 
 pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -1296,18 +1323,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
        pte_t *pte;
        spinlock_t *ptl;
 
-       retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
-       if (retval)
-               goto out;
-
        retval = -EINVAL;
        if (PageAnon(page))
-               goto out_uncharge;
+               goto out;
        retval = -ENOMEM;
        flush_dcache_page(page);
        pte = get_locked_pte(mm, addr, &ptl);
        if (!pte)
-               goto out_uncharge;
+               goto out;
        retval = -EBUSY;
        if (!pte_none(*pte))
                goto out_unlock;
@@ -1323,8 +1346,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
        return retval;
 out_unlock:
        pte_unmap_unlock(pte, ptl);
-out_uncharge:
-       mem_cgroup_uncharge_page(page);
 out:
        return retval;
 }
@@ -1858,6 +1879,15 @@ gotten:
        new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
        if (!new_page)
                goto oom;
+       /*
+        * Don't let another task, with possibly unlocked vma,
+        * keep the mlocked page.
+        */
+       if (vma->vm_flags & VM_LOCKED) {
+               lock_page(old_page);    /* for LRU manipulation */
+               clear_page_mlock(old_page);
+               unlock_page(old_page);
+       }
        cow_user_page(new_page, old_page, address, vma);
        __SetPageUptodate(new_page);
 
@@ -1886,11 +1916,13 @@ gotten:
                 * thread doing COW.
                 */
                ptep_clear_flush_notify(vma, address, page_table);
-               set_pte_at(mm, address, page_table, entry);
-               update_mmu_cache(vma, address, entry);
-               lru_cache_add_active(new_page);
+               SetPageSwapBacked(new_page);
+               lru_cache_add_active_or_unevictable(new_page, vma);
                page_add_new_anon_rmap(new_page, vma, address);
 
+//TODO:  is this safe?  do_anonymous_page() does it this way.
+               set_pte_at(mm, address, page_table, entry);
+               update_mmu_cache(vma, address, entry);
                if (old_page) {
                        /*
                         * Only after switching the pte to the new page may
@@ -2288,16 +2320,17 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                count_vm_event(PGMAJFAULT);
        }
 
+       mark_page_accessed(page);
+
+       lock_page(page);
+       delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+
        if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
-               delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
                ret = VM_FAULT_OOM;
+               unlock_page(page);
                goto out;
        }
 
-       mark_page_accessed(page);
-       lock_page(page);
-       delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
-
        /*
         * Back out if somebody else already faulted in this pte.
         */
@@ -2324,7 +2357,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
        page_add_anon_rmap(page, vma, address);
 
        swap_free(entry);
-       if (vm_swap_full())
+       if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
                remove_exclusive_swap_page(page);
        unlock_page(page);
 
@@ -2382,7 +2415,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!pte_none(*page_table))
                goto release;
        inc_mm_counter(mm, anon_rss);
-       lru_cache_add_active(page);
+       SetPageSwapBacked(page);
+       lru_cache_add_active_or_unevictable(page, vma);
        page_add_new_anon_rmap(page, vma, address);
        set_pte_at(mm, address, page_table, entry);
 
@@ -2423,6 +2457,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        struct page *page;
        pte_t entry;
        int anon = 0;
+       int charged = 0;
        struct page *dirty_page = NULL;
        struct vm_fault vmf;
        int ret;
@@ -2463,6 +2498,18 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                                ret = VM_FAULT_OOM;
                                goto out;
                        }
+                       if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
+                               ret = VM_FAULT_OOM;
+                               page_cache_release(page);
+                               goto out;
+                       }
+                       charged = 1;
+                       /*
+                        * Don't let another task, with possibly unlocked vma,
+                        * keep the mlocked page.
+                        */
+                       if (vma->vm_flags & VM_LOCKED)
+                               clear_page_mlock(vmf.page);
                        copy_user_highpage(page, vmf.page, address, vma);
                        __SetPageUptodate(page);
                } else {
@@ -2497,11 +2544,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
        }
 
-       if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
-               ret = VM_FAULT_OOM;
-               goto out;
-       }
-
        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 
        /*
@@ -2520,11 +2562,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                entry = mk_pte(page, vma->vm_page_prot);
                if (flags & FAULT_FLAG_WRITE)
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-               set_pte_at(mm, address, page_table, entry);
                if (anon) {
-                        inc_mm_counter(mm, anon_rss);
-                        lru_cache_add_active(page);
-                        page_add_new_anon_rmap(page, vma, address);
+                       inc_mm_counter(mm, anon_rss);
+                       SetPageSwapBacked(page);
+                       lru_cache_add_active_or_unevictable(page, vma);
+                       page_add_new_anon_rmap(page, vma, address);
                } else {
                        inc_mm_counter(mm, file_rss);
                        page_add_file_rmap(page);
@@ -2533,11 +2575,14 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                                get_page(dirty_page);
                        }
                }
+//TODO:  is this safe?  do_anonymous_page() does it this way.
+               set_pte_at(mm, address, page_table, entry);
 
                /* no need to invalidate: a not-present page won't be cached */
                update_mmu_cache(vma, address, entry);
        } else {
-               mem_cgroup_uncharge_page(page);
+               if (charged)
+                       mem_cgroup_uncharge_page(page);
                if (anon)
                        page_cache_release(page);
                else
@@ -2772,19 +2817,9 @@ int make_pages_present(unsigned long addr, unsigned long end)
        len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
        ret = get_user_pages(current, current->mm, addr,
                        len, write, 0, NULL, NULL);
-       if (ret < 0) {
-               /*
-                  SUS require strange return value to mlock
-                   - invalid addr generate to ENOMEM.
-                   - out of memory should generate EAGAIN.
-               */
-               if (ret == -EFAULT)
-                       ret = -ENOMEM;
-               else if (ret == -ENOMEM)
-                       ret = -EAGAIN;
+       if (ret < 0)
                return ret;
-       }
-       return ret == len ? 0 : -ENOMEM;
+       return ret == len ? 0 : -EFAULT;
 }
 
 #if !defined(__HAVE_ARCH_GATE_AREA)
index 89fee2dcb03928594ca2da67e9b75e7c8507394b..6837a1014372556c7dd78d66d9ba9ea9cb931832 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/migrate.h>
 #include <linux/page-isolation.h>
+#include <linux/pfn.h>
 
 #include <asm/tlbflush.h>
 
@@ -323,11 +324,11 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
        BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
        BUG_ON(nr_pages % PAGES_PER_SECTION);
 
-       release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
-
        sections_to_remove = nr_pages / PAGES_PER_SECTION;
        for (i = 0; i < sections_to_remove; i++) {
                unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+               release_mem_region(pfn << PAGE_SHIFT,
+                                  PAGES_PER_SECTION << PAGE_SHIFT);
                ret = __remove_section(zone, __pfn_to_section(pfn));
                if (ret)
                        break;
@@ -657,8 +658,9 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                 * We can skip free pages. And we can only deal with pages on
                 * LRU.
                 */
-               ret = isolate_lru_page(page, &source);
+               ret = isolate_lru_page(page);
                if (!ret) { /* Success */
+                       list_add_tail(&page->lru, &source);
                        move_pages--;
                } else {
                        /* Becasue we don't have big zone->lock. we should
@@ -849,10 +851,19 @@ failed_removal:
 
        return ret;
 }
+
+int remove_memory(u64 start, u64 size)
+{
+       unsigned long start_pfn, end_pfn;
+
+       start_pfn = PFN_DOWN(start);
+       end_pfn = start_pfn + PFN_DOWN(size);
+       return offline_pages(start_pfn, end_pfn, 120 * HZ);
+}
 #else
 int remove_memory(u64 start, u64 size)
 {
        return -EINVAL;
 }
-EXPORT_SYMBOL_GPL(remove_memory);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
+EXPORT_SYMBOL_GPL(remove_memory);
index 83369058ec133b380a482ca86792087134b944f6..36f42573a3352fa6ba452eea2d76ed2021c8605c 100644 (file)
@@ -93,6 +93,8 @@
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
 
+#include "internal.h"
+
 /* Internal flags */
 #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0)   /* Skip checks for continuous vmas */
 #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1)         /* Invert check for nodemask */
@@ -762,8 +764,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
        /*
         * Avoid migrating a page that is shared with others.
         */
-       if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1)
-               isolate_lru_page(page, pagelist);
+       if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
+               if (!isolate_lru_page(page)) {
+                       list_add_tail(&page->lru, pagelist);
+               }
+       }
 }
 
 static struct page *new_node_page(struct page *page, unsigned long node, int **x)
@@ -2197,7 +2202,7 @@ static void gather_stats(struct page *page, void *private, int pte_dirty)
        if (PageSwapCache(page))
                md->swapcache++;
 
-       if (PageActive(page))
+       if (PageActive(page) || PageUnevictable(page))
                md->active++;
 
        if (PageWriteback(page))
index 2a80136b23bbc16fb9d2ec2fad932b29e30dda50..6602941bfab0b03607f9bce6dd8e3f9ce1046bfe 100644 (file)
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
 
-/*
- * Isolate one page from the LRU lists. If successful put it onto
- * the indicated list with elevated page count.
- *
- * Result:
- *  -EBUSY: page not on LRU list
- *  0: page removed from LRU list and added to the specified list.
- */
-int isolate_lru_page(struct page *page, struct list_head *pagelist)
-{
-       int ret = -EBUSY;
-
-       if (PageLRU(page)) {
-               struct zone *zone = page_zone(page);
-
-               spin_lock_irq(&zone->lru_lock);
-               if (PageLRU(page) && get_page_unless_zero(page)) {
-                       ret = 0;
-                       ClearPageLRU(page);
-                       if (PageActive(page))
-                               del_page_from_active_list(zone, page);
-                       else
-                               del_page_from_inactive_list(zone, page);
-                       list_add_tail(&page->lru, pagelist);
-               }
-               spin_unlock_irq(&zone->lru_lock);
-       }
-       return ret;
-}
-
 /*
  * migrate_prep() needs to be called before we start compiling a list of pages
  * to be migrated using isolate_lru_page().
@@ -83,23 +53,9 @@ int migrate_prep(void)
        return 0;
 }
 
-static inline void move_to_lru(struct page *page)
-{
-       if (PageActive(page)) {
-               /*
-                * lru_cache_add_active checks that
-                * the PG_active bit is off.
-                */
-               ClearPageActive(page);
-               lru_cache_add_active(page);
-       } else {
-               lru_cache_add(page);
-       }
-       put_page(page);
-}
-
 /*
- * Add isolated pages on the list back to the LRU.
+ * Add isolated pages on the list back to the LRU under page lock
+ * to avoid leaking evictable pages back onto unevictable list.
  *
  * returns the number of pages put back.
  */
@@ -111,7 +67,7 @@ int putback_lru_pages(struct list_head *l)
 
        list_for_each_entry_safe(page, page2, l, lru) {
                list_del(&page->lru);
-               move_to_lru(page);
+               putback_lru_page(page);
                count++;
        }
        return count;
@@ -374,8 +330,6 @@ static int migrate_page_move_mapping(struct address_space *mapping,
        __inc_zone_page_state(newpage, NR_FILE_PAGES);
 
        spin_unlock_irq(&mapping->tree_lock);
-       if (!PageSwapCache(newpage))
-               mem_cgroup_uncharge_cache_page(page);
 
        return 0;
 }
@@ -385,6 +339,8 @@ static int migrate_page_move_mapping(struct address_space *mapping,
  */
 static void migrate_page_copy(struct page *newpage, struct page *page)
 {
+       int anon;
+
        copy_highpage(newpage, page);
 
        if (PageError(page))
@@ -393,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
                SetPageReferenced(newpage);
        if (PageUptodate(page))
                SetPageUptodate(newpage);
-       if (PageActive(page))
+       if (TestClearPageActive(page)) {
+               VM_BUG_ON(PageUnevictable(page));
                SetPageActive(newpage);
+       } else
+               unevictable_migrate_page(newpage, page);
        if (PageChecked(page))
                SetPageChecked(newpage);
        if (PageMappedToDisk(page))
@@ -412,14 +371,20 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
                __set_page_dirty_nobuffers(newpage);
        }
 
+       mlock_migrate_page(newpage, page);
+
 #ifdef CONFIG_SWAP
        ClearPageSwapCache(page);
 #endif
-       ClearPageActive(page);
        ClearPagePrivate(page);
        set_page_private(page, 0);
+       /* page->mapping contains a flag for PageAnon() */
+       anon = PageAnon(page);
        page->mapping = NULL;
 
+       if (!anon) /* This page was removed from radix-tree. */
+               mem_cgroup_uncharge_cache_page(page);
+
        /*
         * If any waiters have accumulated on the new page then
         * wake them up.
@@ -594,6 +559,10 @@ static int fallback_migrate_page(struct address_space *mapping,
  *
  * The new page will have replaced the old page if this function
  * is successful.
+ *
+ * Return value:
+ *   < 0 - error code
+ *  == 0 - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page)
 {
@@ -611,6 +580,8 @@ static int move_to_new_page(struct page *newpage, struct page *page)
        /* Prepare mapping for the new page.*/
        newpage->index = page->index;
        newpage->mapping = page->mapping;
+       if (PageSwapBacked(page))
+               SetPageSwapBacked(newpage);
 
        mapping = page_mapping(page);
        if (!mapping)
@@ -654,9 +625,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
        if (!newpage)
                return -ENOMEM;
 
-       if (page_count(page) == 1)
+       if (page_count(page) == 1) {
                /* page was freed from under us. So we are done. */
                goto move_newpage;
+       }
 
        charge = mem_cgroup_prepare_migration(page, newpage);
        if (charge == -ENOMEM) {
@@ -730,7 +702,6 @@ rcu_unlock:
                rcu_read_unlock();
 
 unlock:
-
        unlock_page(page);
 
        if (rc != -EAGAIN) {
@@ -741,17 +712,19 @@ unlock:
                 * restored.
                 */
                list_del(&page->lru);
-               move_to_lru(page);
+               putback_lru_page(page);
        }
 
 move_newpage:
        if (!charge)
                mem_cgroup_end_migration(newpage);
+
        /*
         * Move the new page to the LRU. If migration was not successful
         * then this will free the page.
         */
-       move_to_lru(newpage);
+       putback_lru_page(newpage);
+
        if (result) {
                if (rc)
                        *result = rc;
@@ -858,9 +831,11 @@ static struct page *new_page_node(struct page *p, unsigned long private,
  * Move a set of pages as indicated in the pm array. The addr
  * field must be set to the virtual address of the page to be moved
  * and the node number must contain a valid target node.
+ * The pm array ends with node = MAX_NUMNODES.
  */
-static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
-                               int migrate_all)
+static int do_move_page_to_node_array(struct mm_struct *mm,
+                                     struct page_to_node *pm,
+                                     int migrate_all)
 {
        int err;
        struct page_to_node *pp;
@@ -914,7 +889,9 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
                                !migrate_all)
                        goto put_and_set;
 
-               err = isolate_lru_page(page, &pagelist);
+               err = isolate_lru_page(page);
+               if (!err)
+                       list_add_tail(&page->lru, &pagelist);
 put_and_set:
                /*
                 * Either remove the duplicate refcount from
@@ -926,36 +903,118 @@ set_status:
                pp->status = err;
        }
 
+       err = 0;
        if (!list_empty(&pagelist))
                err = migrate_pages(&pagelist, new_page_node,
                                (unsigned long)pm);
-       else
-               err = -ENOENT;
 
        up_read(&mm->mmap_sem);
        return err;
 }
 
 /*
- * Determine the nodes of a list of pages. The addr in the pm array
- * must have been set to the virtual address of which we want to determine
- * the node number.
+ * Migrate an array of page address onto an array of nodes and fill
+ * the corresponding array of status.
  */
-static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
+static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
+                        unsigned long nr_pages,
+                        const void __user * __user *pages,
+                        const int __user *nodes,
+                        int __user *status, int flags)
 {
+       struct page_to_node *pm = NULL;
+       nodemask_t task_nodes;
+       int err = 0;
+       int i;
+
+       task_nodes = cpuset_mems_allowed(task);
+
+       /* Limit nr_pages so that the multiplication may not overflow */
+       if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
+               err = -E2BIG;
+               goto out;
+       }
+
+       pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
+       if (!pm) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       /*
+        * Get parameters from user space and initialize the pm
+        * array. Return various errors if the user did something wrong.
+        */
+       for (i = 0; i < nr_pages; i++) {
+               const void __user *p;
+
+               err = -EFAULT;
+               if (get_user(p, pages + i))
+                       goto out_pm;
+
+               pm[i].addr = (unsigned long)p;
+               if (nodes) {
+                       int node;
+
+                       if (get_user(node, nodes + i))
+                               goto out_pm;
+
+                       err = -ENODEV;
+                       if (!node_state(node, N_HIGH_MEMORY))
+                               goto out_pm;
+
+                       err = -EACCES;
+                       if (!node_isset(node, task_nodes))
+                               goto out_pm;
+
+                       pm[i].node = node;
+               } else
+                       pm[i].node = 0; /* anything to not match MAX_NUMNODES */
+       }
+       /* End marker */
+       pm[nr_pages].node = MAX_NUMNODES;
+
+       err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
+       if (err >= 0)
+               /* Return status information */
+               for (i = 0; i < nr_pages; i++)
+                       if (put_user(pm[i].status, status + i))
+                               err = -EFAULT;
+
+out_pm:
+       vfree(pm);
+out:
+       return err;
+}
+
+/*
+ * Determine the nodes of an array of pages and store it in an array of status.
+ */
+static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
+                        const void __user * __user *pages,
+                        int __user *status)
+{
+       unsigned long i;
+       int err;
+
        down_read(&mm->mmap_sem);
 
-       for ( ; pm->node != MAX_NUMNODES; pm++) {
+       for (i = 0; i < nr_pages; i++) {
+               const void __user *p;
+               unsigned long addr;
                struct vm_area_struct *vma;
                struct page *page;
-               int err;
 
                err = -EFAULT;
-               vma = find_vma(mm, pm->addr);
+               if (get_user(p, pages+i))
+                       goto out;
+               addr = (unsigned long) p;
+
+               vma = find_vma(mm, addr);
                if (!vma)
                        goto set_status;
 
-               page = follow_page(vma, pm->addr, 0);
+               page = follow_page(vma, addr, 0);
 
                err = PTR_ERR(page);
                if (IS_ERR(page))
@@ -968,11 +1027,13 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
 
                err = page_to_nid(page);
 set_status:
-               pm->status = err;
+               put_user(err, status+i);
        }
+       err = 0;
 
+out:
        up_read(&mm->mmap_sem);
-       return 0;
+       return err;
 }
 
 /*
@@ -984,12 +1045,9 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
                        const int __user *nodes,
                        int __user *status, int flags)
 {
-       int err = 0;
-       int i;
        struct task_struct *task;
-       nodemask_t task_nodes;
        struct mm_struct *mm;
-       struct page_to_node *pm = NULL;
+       int err;
 
        /* Check flags */
        if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1021,75 +1079,21 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
            (current->uid != task->suid) && (current->uid != task->uid) &&
            !capable(CAP_SYS_NICE)) {
                err = -EPERM;
-               goto out2;
+               goto out;
        }
 
        err = security_task_movememory(task);
        if (err)
-               goto out2;
-
-
-       task_nodes = cpuset_mems_allowed(task);
-
-       /* Limit nr_pages so that the multiplication may not overflow */
-       if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
-               err = -E2BIG;
-               goto out2;
-       }
-
-       pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
-       if (!pm) {
-               err = -ENOMEM;
-               goto out2;
-       }
-
-       /*
-        * Get parameters from user space and initialize the pm
-        * array. Return various errors if the user did something wrong.
-        */
-       for (i = 0; i < nr_pages; i++) {
-               const void __user *p;
-
-               err = -EFAULT;
-               if (get_user(p, pages + i))
-                       goto out;
-
-               pm[i].addr = (unsigned long)p;
-               if (nodes) {
-                       int node;
-
-                       if (get_user(node, nodes + i))
-                               goto out;
-
-                       err = -ENODEV;
-                       if (!node_state(node, N_HIGH_MEMORY))
-                               goto out;
-
-                       err = -EACCES;
-                       if (!node_isset(node, task_nodes))
-                               goto out;
+               goto out;
 
-                       pm[i].node = node;
-               } else
-                       pm[i].node = 0; /* anything to not match MAX_NUMNODES */
+       if (nodes) {
+               err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
+                                   flags);
+       } else {
+               err = do_pages_stat(mm, nr_pages, pages, status);
        }
-       /* End marker */
-       pm[nr_pages].node = MAX_NUMNODES;
-
-       if (nodes)
-               err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
-       else
-               err = do_pages_stat(mm, pm);
-
-       if (err >= 0)
-               /* Return status information */
-               for (i = 0; i < nr_pages; i++)
-                       if (put_user(pm[i].status, status + i))
-                               err = -EFAULT;
 
 out:
-       vfree(pm);
-out2:
        mmput(mm);
        return err;
 }
index 01fbe93eff5ca25f1143b1295da6ce7115e00bde..008ea70b7afa9a2baed56cdc9b6b43ef0b05a6ff 100644 (file)
@@ -8,10 +8,18 @@
 #include <linux/capability.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagemap.h>
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/rmap.h>
+#include <linux/mmzone.h>
+#include <linux/hugetlb.h>
+
+#include "internal.h"
 
 int can_do_mlock(void)
 {
@@ -23,17 +31,381 @@ int can_do_mlock(void)
 }
 EXPORT_SYMBOL(can_do_mlock);
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * Mlocked pages are marked with PageMlocked() flag for efficient testing
+ * in vmscan and, possibly, the fault path; and to support semi-accurate
+ * statistics.
+ *
+ * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
+ * be placed on the LRU "unevictable" list, rather than the [in]active lists.
+ * The unevictable list is an LRU sibling list to the [in]active lists.
+ * PageUnevictable is set to indicate the unevictable state.
+ *
+ * When lazy mlocking via vmscan, it is important to ensure that the
+ * vma's VM_LOCKED status is not concurrently being modified, otherwise we
+ * may have mlocked a page that is being munlocked. So lazy mlock must take
+ * the mmap_sem for read, and verify that the vma really is locked
+ * (see mm/rmap.c).
+ */
+
+/*
+ *  LRU accounting for clear_page_mlock()
+ */
+void __clear_page_mlock(struct page *page)
+{
+       VM_BUG_ON(!PageLocked(page));
+
+       if (!page->mapping) {   /* truncated ? */
+               return;
+       }
+
+       dec_zone_page_state(page, NR_MLOCK);
+       count_vm_event(UNEVICTABLE_PGCLEARED);
+       if (!isolate_lru_page(page)) {
+               putback_lru_page(page);
+       } else {
+               /*
+                * Page not on the LRU yet.  Flush all pagevecs and retry.
+                */
+               lru_add_drain_all();
+               if (!isolate_lru_page(page))
+                       putback_lru_page(page);
+               else if (PageUnevictable(page))
+                       count_vm_event(UNEVICTABLE_PGSTRANDED);
+
+       }
+}
+
+/*
+ * Mark page as mlocked if not already.
+ * If page on LRU, isolate and putback to move to unevictable list.
+ */
+void mlock_vma_page(struct page *page)
+{
+       BUG_ON(!PageLocked(page));
+
+       if (!TestSetPageMlocked(page)) {
+               inc_zone_page_state(page, NR_MLOCK);
+               count_vm_event(UNEVICTABLE_PGMLOCKED);
+               if (!isolate_lru_page(page))
+                       putback_lru_page(page);
+       }
+}
+
+/*
+ * called from munlock()/munmap() path with page supposedly on the LRU.
+ *
+ * Note:  unlike mlock_vma_page(), we can't just clear the PageMlocked
+ * [in try_to_munlock()] and then attempt to isolate the page.  We must
+ * isolate the page to keep others from messing with its unevictable
+ * and mlocked state while trying to munlock.  However, we pre-clear the
+ * mlocked state anyway as we might lose the isolation race and we might
+ * not get another chance to clear PageMlocked.  If we successfully
+ * isolate the page and try_to_munlock() detects other VM_LOCKED vmas
+ * mapping the page, it will restore the PageMlocked state, unless the page
+ * is mapped in a non-linear vma.  So, we go ahead and SetPageMlocked(),
+ * perhaps redundantly.
+ * If we lose the isolation race, and the page is mapped by other VM_LOCKED
+ * vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap()
+ * either of which will restore the PageMlocked state by calling
+ * mlock_vma_page() above, if it can grab the vma's mmap sem.
+ */
+static void munlock_vma_page(struct page *page)
+{
+       BUG_ON(!PageLocked(page));
+
+       if (TestClearPageMlocked(page)) {
+               dec_zone_page_state(page, NR_MLOCK);
+               if (!isolate_lru_page(page)) {
+                       int ret = try_to_munlock(page);
+                       /*
+                        * did try_to_unlock() succeed or punt?
+                        */
+                       if (ret == SWAP_SUCCESS || ret == SWAP_AGAIN)
+                               count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+
+                       putback_lru_page(page);
+               } else {
+                       /*
+                        * We lost the race.  let try_to_unmap() deal
+                        * with it.  At least we get the page state and
+                        * mlock stats right.  However, page is still on
+                        * the noreclaim list.  We'll fix that up when
+                        * the page is eventually freed or we scan the
+                        * noreclaim list.
+                        */
+                       if (PageUnevictable(page))
+                               count_vm_event(UNEVICTABLE_PGSTRANDED);
+                       else
+                               count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+               }
+       }
+}
+
+/**
+ * __mlock_vma_pages_range() -  mlock/munlock a range of pages in the vma.
+ * @vma:   target vma
+ * @start: start address
+ * @end:   end address
+ * @mlock: 0 indicate munlock, otherwise mlock.
+ *
+ * If @mlock == 0, unlock an mlocked range;
+ * else mlock the range of pages.  This takes care of making the pages present ,
+ * too.
+ *
+ * return 0 on success, negative error code on error.
+ *
+ * vma->vm_mm->mmap_sem must be held for at least read.
+ */
+static long __mlock_vma_pages_range(struct vm_area_struct *vma,
+                                  unsigned long start, unsigned long end,
+                                  int mlock)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long addr = start;
+       struct page *pages[16]; /* 16 gives a reasonable batch */
+       int nr_pages = (end - start) / PAGE_SIZE;
+       int ret;
+       int gup_flags = 0;
+
+       VM_BUG_ON(start & ~PAGE_MASK);
+       VM_BUG_ON(end   & ~PAGE_MASK);
+       VM_BUG_ON(start < vma->vm_start);
+       VM_BUG_ON(end   > vma->vm_end);
+       VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&
+                 (atomic_read(&mm->mm_users) != 0));
+
+       /*
+        * mlock:   don't page populate if page has PROT_NONE permission.
+        * munlock: the pages always do munlock althrough
+        *          its has PROT_NONE permission.
+        */
+       if (!mlock)
+               gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS;
+
+       if (vma->vm_flags & VM_WRITE)
+               gup_flags |= GUP_FLAGS_WRITE;
+
+       lru_add_drain_all();    /* push cached pages to LRU */
+
+       while (nr_pages > 0) {
+               int i;
+
+               cond_resched();
+
+               /*
+                * get_user_pages makes pages present if we are
+                * setting mlock. and this extra reference count will
+                * disable migration of this page.  However, page may
+                * still be truncated out from under us.
+                */
+               ret = __get_user_pages(current, mm, addr,
+                               min_t(int, nr_pages, ARRAY_SIZE(pages)),
+                               gup_flags, pages, NULL);
+               /*
+                * This can happen for, e.g., VM_NONLINEAR regions before
+                * a page has been allocated and mapped at a given offset,
+                * or for addresses that map beyond end of a file.
+                * We'll mlock the the pages if/when they get faulted in.
+                */
+               if (ret < 0)
+                       break;
+               if (ret == 0) {
+                       /*
+                        * We know the vma is there, so the only time
+                        * we cannot get a single page should be an
+                        * error (ret < 0) case.
+                        */
+                       WARN_ON(1);
+                       break;
+               }
+
+               lru_add_drain();        /* push cached pages to LRU */
+
+               for (i = 0; i < ret; i++) {
+                       struct page *page = pages[i];
+
+                       lock_page(page);
+                       /*
+                        * Because we lock page here and migration is blocked
+                        * by the elevated reference, we need only check for
+                        * page truncation (file-cache only).
+                        */
+                       if (page->mapping) {
+                               if (mlock)
+                                       mlock_vma_page(page);
+                               else
+                                       munlock_vma_page(page);
+                       }
+                       unlock_page(page);
+                       put_page(page);         /* ref from get_user_pages() */
+
+                       /*
+                        * here we assume that get_user_pages() has given us
+                        * a list of virtually contiguous pages.
+                        */
+                       addr += PAGE_SIZE;      /* for next get_user_pages() */
+                       nr_pages--;
+               }
+               ret = 0;
+       }
+
+       lru_add_drain_all();    /* to update stats */
+
+       return ret;     /* count entire vma as locked_vm */
+}
+
+/*
+ * convert get_user_pages() return value to posix mlock() error
+ */
+static int __mlock_posix_error_return(long retval)
+{
+       if (retval == -EFAULT)
+               retval = -ENOMEM;
+       else if (retval == -ENOMEM)
+               retval = -EAGAIN;
+       return retval;
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+
+/*
+ * Just make pages present if VM_LOCKED.  No-op if unlocking.
+ */
+static long __mlock_vma_pages_range(struct vm_area_struct *vma,
+                                  unsigned long start, unsigned long end,
+                                  int mlock)
+{
+       if (mlock && (vma->vm_flags & VM_LOCKED))
+               return make_pages_present(start, end);
+       return 0;
+}
+
+static inline int __mlock_posix_error_return(long retval)
+{
+       return 0;
+}
+
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
+/**
+ * mlock_vma_pages_range() - mlock pages in specified vma range.
+ * @vma - the vma containing the specfied address range
+ * @start - starting address in @vma to mlock
+ * @end   - end address [+1] in @vma to mlock
+ *
+ * For mmap()/mremap()/expansion of mlocked vma.
+ *
+ * return 0 on success for "normal" vmas.
+ *
+ * return number of pages [> 0] to be removed from locked_vm on success
+ * of "special" vmas.
+ *
+ * return negative error if vma spanning @start-@range disappears while
+ * mmap semaphore is dropped.  Unlikely?
+ */
+long mlock_vma_pages_range(struct vm_area_struct *vma,
+                       unsigned long start, unsigned long end)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       int nr_pages = (end - start) / PAGE_SIZE;
+       BUG_ON(!(vma->vm_flags & VM_LOCKED));
+
+       /*
+        * filter unlockable vmas
+        */
+       if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+               goto no_mlock;
+
+       if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+                       is_vm_hugetlb_page(vma) ||
+                       vma == get_gate_vma(current))) {
+               long error;
+               downgrade_write(&mm->mmap_sem);
+
+               error = __mlock_vma_pages_range(vma, start, end, 1);
+
+               up_read(&mm->mmap_sem);
+               /* vma can change or disappear */
+               down_write(&mm->mmap_sem);
+               vma = find_vma(mm, start);
+               /* non-NULL vma must contain @start, but need to check @end */
+               if (!vma ||  end > vma->vm_end)
+                       return -ENOMEM;
+
+               return 0;       /* hide other errors from mmap(), et al */
+       }
+
+       /*
+        * User mapped kernel pages or huge pages:
+        * make these pages present to populate the ptes, but
+        * fall thru' to reset VM_LOCKED--no need to unlock, and
+        * return nr_pages so these don't get counted against task's
+        * locked limit.  huge pages are already counted against
+        * locked vm limit.
+        */
+       make_pages_present(start, end);
+
+no_mlock:
+       vma->vm_flags &= ~VM_LOCKED;    /* and don't come back! */
+       return nr_pages;                /* error or pages NOT mlocked */
+}
+
+
+/*
+ * munlock_vma_pages_range() - munlock all pages in the vma range.'
+ * @vma - vma containing range to be munlock()ed.
+ * @start - start address in @vma of the range
+ * @end - end of range in @vma.
+ *
+ *  For mremap(), munmap() and exit().
+ *
+ * Called with @vma VM_LOCKED.
+ *
+ * Returns with VM_LOCKED cleared.  Callers must be prepared to
+ * deal with this.
+ *
+ * We don't save and restore VM_LOCKED here because pages are
+ * still on lru.  In unmap path, pages might be scanned by reclaim
+ * and re-mlocked by try_to_{munlock|unmap} before we unmap and
+ * free them.  This will result in freeing mlocked pages.
+ */
+void munlock_vma_pages_range(struct vm_area_struct *vma,
+                          unsigned long start, unsigned long end)
+{
+       vma->vm_flags &= ~VM_LOCKED;
+       __mlock_vma_pages_range(vma, start, end, 0);
+}
+
+/*
+ * mlock_fixup  - handle mlock[all]/munlock[all] requests.
+ *
+ * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
+ * munlock is a no-op.  However, for some special vmas, we go ahead and
+ * populate the ptes via make_pages_present().
+ *
+ * For vmas that pass the filters, merge/split as appropriate.
+ */
 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
        unsigned long start, unsigned long end, unsigned int newflags)
 {
-       struct mm_struct * mm = vma->vm_mm;
+       struct mm_struct *mm = vma->vm_mm;
        pgoff_t pgoff;
-       int pages;
+       int nr_pages;
        int ret = 0;
-
-       if (newflags == vma->vm_flags) {
-               *prev = vma;
-               goto out;
+       int lock = newflags & VM_LOCKED;
+
+       if (newflags == vma->vm_flags ||
+                       (vma->vm_flags & (VM_IO | VM_PFNMAP)))
+               goto out;       /* don't set VM_LOCKED,  don't count */
+
+       if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+                       is_vm_hugetlb_page(vma) ||
+                       vma == get_gate_vma(current)) {
+               if (lock)
+                       make_pages_present(start, end);
+               goto out;       /* don't set VM_LOCKED,  don't count */
        }
 
        pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
@@ -44,8 +416,6 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
                goto success;
        }
 
-       *prev = vma;
-
        if (start != vma->vm_start) {
                ret = split_vma(mm, vma, start, 1);
                if (ret)
@@ -59,25 +429,62 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
        }
 
 success:
+       /*
+        * Keep track of amount of locked VM.
+        */
+       nr_pages = (end - start) >> PAGE_SHIFT;
+       if (!lock)
+               nr_pages = -nr_pages;
+       mm->locked_vm += nr_pages;
+
        /*
         * vm_flags is protected by the mmap_sem held in write mode.
         * It's okay if try_to_unmap_one unmaps a page just after we
-        * set VM_LOCKED, make_pages_present below will bring it back.
+        * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
         */
        vma->vm_flags = newflags;
 
-       /*
-        * Keep track of amount of locked VM.
-        */
-       pages = (end - start) >> PAGE_SHIFT;
-       if (newflags & VM_LOCKED) {
-               pages = -pages;
-               if (!(newflags & VM_IO))
-                       ret = make_pages_present(start, end);
+       if (lock) {
+               /*
+                * mmap_sem is currently held for write.  Downgrade the write
+                * lock to a read lock so that other faults, mmap scans, ...
+                * while we fault in all pages.
+                */
+               downgrade_write(&mm->mmap_sem);
+
+               ret = __mlock_vma_pages_range(vma, start, end, 1);
+
+               /*
+                * Need to reacquire mmap sem in write mode, as our callers
+                * expect this.  We have no support for atomically upgrading
+                * a sem to write, so we need to check for ranges while sem
+                * is unlocked.
+                */
+               up_read(&mm->mmap_sem);
+               /* vma can change or disappear */
+               down_write(&mm->mmap_sem);
+               *prev = find_vma(mm, start);
+               /* non-NULL *prev must contain @start, but need to check @end */
+               if (!(*prev) || end > (*prev)->vm_end)
+                       ret = -ENOMEM;
+               else if (ret > 0) {
+                       mm->locked_vm -= ret;
+                       ret = 0;
+               } else
+                       ret = __mlock_posix_error_return(ret); /* translate if needed */
+       } else {
+               /*
+                * TODO:  for unlocking, pages will already be resident, so
+                * we don't need to wait for allocations/reclaim/pagein, ...
+                * However, unlocking a very large region can still take a
+                * while.  Should we downgrade the semaphore for both lock
+                * AND unlock ?
+                */
+               __mlock_vma_pages_range(vma, start, end, 0);
        }
 
-       mm->locked_vm -= pages;
 out:
+       *prev = vma;
        return ret;
 }
 
index e7a5a68a9c2e4ef0b477aba5850a7299bc410aab..74f4d158022ec00ae1d405596fdbb5bb465c88b5 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -410,7 +410,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
        rb_insert_color(&vma->vm_rb, &mm->mm_rb);
 }
 
-static inline void __vma_link_file(struct vm_area_struct *vma)
+static void __vma_link_file(struct vm_area_struct *vma)
 {
        struct file * file;
 
@@ -662,8 +662,6 @@ again:                      remove_next = 1 + (end > next->vm_end);
  * If the vma has a ->close operation then the driver probably needs to release
  * per-vma resources, so we don't attempt to merge those.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
-
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
                        struct file *file, unsigned long vm_flags)
 {
@@ -972,6 +970,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
                        return -EPERM;
                vm_flags |= VM_LOCKED;
        }
+
        /* mlock MCL_FUTURE? */
        if (vm_flags & VM_LOCKED) {
                unsigned long locked, lock_limit;
@@ -1139,10 +1138,12 @@ munmap_back:
         * The VM_SHARED test is necessary because shmem_zero_setup
         * will create the file object for a shared anonymous map below.
         */
-       if (!file && !(vm_flags & VM_SHARED) &&
-           vma_merge(mm, prev, addr, addr + len, vm_flags,
-                                       NULL, NULL, pgoff, NULL))
-               goto out;
+       if (!file && !(vm_flags & VM_SHARED)) {
+               vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
+                                       NULL, NULL, pgoff, NULL);
+               if (vma)
+                       goto out;
+       }
 
        /*
         * Determine the object being mapped and call the appropriate
@@ -1224,10 +1225,14 @@ out:
        mm->total_vm += len >> PAGE_SHIFT;
        vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
        if (vm_flags & VM_LOCKED) {
-               mm->locked_vm += len >> PAGE_SHIFT;
-               make_pages_present(addr, addr + len);
-       }
-       if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
+               /*
+                * makes pages present; downgrades, drops, reacquires mmap_sem
+                */
+               long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
+               if (nr_pages < 0)
+                       return nr_pages;        /* vma gone! */
+               mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
+       } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
                make_pages_present(addr, addr + len);
        return addr;
 
@@ -1586,7 +1591,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
  */
 #ifndef CONFIG_IA64
-static inline
+static
 #endif
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
@@ -1636,7 +1641,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
  */
-static inline int expand_downwards(struct vm_area_struct *vma,
+static int expand_downwards(struct vm_area_struct *vma,
                                   unsigned long address)
 {
        int error;
@@ -1698,10 +1703,12 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
        vma = find_vma_prev(mm, addr, &prev);
        if (vma && (vma->vm_start <= addr))
                return vma;
-       if (!prev || expand_stack(prev, addr))
+       if (expand_stack(prev, addr))
                return NULL;
-       if (prev->vm_flags & VM_LOCKED)
-               make_pages_present(addr, prev->vm_end);
+       if (prev->vm_flags & VM_LOCKED) {
+               if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
+                       return NULL;    /* vma gone! */
+       }
        return prev;
 }
 #else
@@ -1727,8 +1734,10 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
        start = vma->vm_start;
        if (expand_stack(vma, addr))
                return NULL;
-       if (vma->vm_flags & VM_LOCKED)
-               make_pages_present(addr, start);
+       if (vma->vm_flags & VM_LOCKED) {
+               if (mlock_vma_pages_range(vma, addr, start) < 0)
+                       return NULL;    /* vma gone! */
+       }
        return vma;
 }
 #endif
@@ -1747,8 +1756,6 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
                long nrpages = vma_pages(vma);
 
                mm->total_vm -= nrpages;
-               if (vma->vm_flags & VM_LOCKED)
-                       mm->locked_vm -= nrpages;
                vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
                vma = remove_vma(vma);
        } while (vma);
@@ -1913,6 +1920,20 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        }
        vma = prev? prev->vm_next: mm->mmap;
 
+       /*
+        * unlock any mlock()ed ranges before detaching vmas
+        */
+       if (mm->locked_vm) {
+               struct vm_area_struct *tmp = vma;
+               while (tmp && tmp->vm_start < end) {
+                       if (tmp->vm_flags & VM_LOCKED) {
+                               mm->locked_vm -= vma_pages(tmp);
+                               munlock_vma_pages_all(tmp);
+                       }
+                       tmp = tmp->vm_next;
+               }
+       }
+
        /*
         * Remove the vma's, and unmap the actual pages
         */
@@ -2025,8 +2046,9 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
                return -ENOMEM;
 
        /* Can we just expand an old private anonymous mapping? */
-       if (vma_merge(mm, prev, addr, addr + len, flags,
-                                       NULL, NULL, pgoff, NULL))
+       vma = vma_merge(mm, prev, addr, addr + len, flags,
+                                       NULL, NULL, pgoff, NULL);
+       if (vma)
                goto out;
 
        /*
@@ -2048,8 +2070,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 out:
        mm->total_vm += len >> PAGE_SHIFT;
        if (flags & VM_LOCKED) {
-               mm->locked_vm += len >> PAGE_SHIFT;
-               make_pages_present(addr, addr + len);
+               if (!mlock_vma_pages_range(vma, addr, addr + len))
+                       mm->locked_vm += (len >> PAGE_SHIFT);
        }
        return addr;
 }
@@ -2060,7 +2082,7 @@ EXPORT_SYMBOL(do_brk);
 void exit_mmap(struct mm_struct *mm)
 {
        struct mmu_gather *tlb;
-       struct vm_area_struct *vma = mm->mmap;
+       struct vm_area_struct *vma;
        unsigned long nr_accounted = 0;
        unsigned long end;
 
@@ -2068,6 +2090,15 @@ void exit_mmap(struct mm_struct *mm)
        arch_exit_mmap(mm);
        mmu_notifier_release(mm);
 
+       if (mm->locked_vm) {
+               vma = mm->mmap;
+               while (vma) {
+                       if (vma->vm_flags & VM_LOCKED)
+                               munlock_vma_pages_all(vma);
+                       vma = vma->vm_next;
+               }
+       }
+       vma = mm->mmap;
        lru_add_drain();
        flush_cache_mm(mm);
        tlb = tlb_gather_mmu(mm, 1);
index 1a7743923c8c42c536d79ecb64f45457b2a3f994..58a2908f42f500002e49adc218adbca0e8602c26 100644 (file)
@@ -24,6 +24,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 {
        pgd_t *pgd;
@@ -238,8 +240,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
        if (vm_flags & VM_LOCKED) {
                mm->locked_vm += new_len >> PAGE_SHIFT;
                if (new_len > old_len)
-                       make_pages_present(new_addr + old_len,
-                                          new_addr + new_len);
+                       mlock_vma_pages_range(new_vma, new_addr + old_len,
+                                                      new_addr + new_len);
        }
 
        return new_addr;
@@ -379,7 +381,7 @@ unsigned long do_mremap(unsigned long addr,
                        vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
                        if (vma->vm_flags & VM_LOCKED) {
                                mm->locked_vm += pages;
-                               make_pages_present(addr + old_len,
+                               mlock_vma_pages_range(vma, addr + old_len,
                                                   addr + new_len);
                        }
                        ret = addr;
index ed75bc962fbe913f8a5835c0a9bc43170582dd7f..2696b24f2bb37c427168655b8f690c46c931dcf6 100644 (file)
@@ -34,6 +34,8 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
+#include "internal.h"
+
 void *high_memory;
 struct page *mem_map;
 unsigned long max_mapnr;
@@ -128,20 +130,16 @@ unsigned int kobjsize(const void *objp)
        return PAGE_SIZE << compound_order(page);
 }
 
-/*
- * get a list of pages in an address range belonging to the specified process
- * and indicate the VMA that covers each page
- * - this is potentially dodgy as we may end incrementing the page count of a
- *   slab page or a secondary page from a compound page
- * - don't permit access to VMAs that don't support it, such as I/O mappings
- */
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-       unsigned long start, int len, int write, int force,
-       struct page **pages, struct vm_area_struct **vmas)
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+                    unsigned long start, int len, int flags,
+               struct page **pages, struct vm_area_struct **vmas)
 {
        struct vm_area_struct *vma;
        unsigned long vm_flags;
        int i;
+       int write = !!(flags & GUP_FLAGS_WRITE);
+       int force = !!(flags & GUP_FLAGS_FORCE);
+       int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 
        /* calculate required read or write permissions.
         * - if 'force' is set, we only require the "MAY" flags.
@@ -156,7 +154,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
                /* protect what we can, including chardevs */
                if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
-                   !(vm_flags & vma->vm_flags))
+                   (!ignore && !(vm_flags & vma->vm_flags)))
                        goto finish_or_fault;
 
                if (pages) {
@@ -174,6 +172,30 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 finish_or_fault:
        return i ? : -EFAULT;
 }
+
+
+/*
+ * get a list of pages in an address range belonging to the specified process
+ * and indicate the VMA that covers each page
+ * - this is potentially dodgy as we may end incrementing the page count of a
+ *   slab page or a secondary page from a compound page
+ * - don't permit access to VMAs that don't support it, such as I/O mappings
+ */
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+       unsigned long start, int len, int write, int force,
+       struct page **pages, struct vm_area_struct **vmas)
+{
+       int flags = 0;
+
+       if (write)
+               flags |= GUP_FLAGS_WRITE;
+       if (force)
+               flags |= GUP_FLAGS_FORCE;
+
+       return __get_user_pages(tsk, mm,
+                               start, len, flags,
+                               pages, vmas);
+}
 EXPORT_SYMBOL(get_user_pages);
 
 DEFINE_RWLOCK(vmlist_lock);
index b40f6d5f8fe9bc24750fb829742ef655f0f4c42c..2970e35fd03f0fb6c3f178eca78b30166eeb6450 100644 (file)
@@ -329,9 +329,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
                struct zone *z =
                        &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
-               x += zone_page_state(z, NR_FREE_PAGES)
-                       + zone_page_state(z, NR_INACTIVE)
-                       + zone_page_state(z, NR_ACTIVE);
+               x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
        }
        /*
         * Make sure that the number of highmem pages is never larger
@@ -355,9 +353,7 @@ unsigned long determine_dirtyable_memory(void)
 {
        unsigned long x;
 
-       x = global_page_state(NR_FREE_PAGES)
-               + global_page_state(NR_INACTIVE)
-               + global_page_state(NR_ACTIVE);
+       x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
 
        if (!vm_highmem_is_dirtyable)
                x -= highmem_dirtyable_memory(x);
index 9eb9eb92828510efbc7100addea5fa766c8f48ea..d0a240fbb8bfc34f5304ee896af7964442230d4d 100644 (file)
@@ -44,7 +44,7 @@
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
-#include <linux/memcontrol.h>
+#include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 
 #include <asm/tlbflush.h>
@@ -223,17 +223,12 @@ static inline int bad_range(struct zone *zone, struct page *page)
 
 static void bad_page(struct page *page)
 {
-       void *pc = page_get_page_cgroup(page);
-
        printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
                "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
                current->comm, page, (int)(2*sizeof(unsigned long)),
                (unsigned long)page->flags, page->mapping,
                page_mapcount(page), page_count(page));
-       if (pc) {
-               printk(KERN_EMERG "cgroup:%p\n", pc);
-               page_reset_bad_cgroup(page);
-       }
+
        printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
                KERN_EMERG "Backtrace:\n");
        dump_stack();
@@ -454,14 +449,16 @@ static inline void __free_one_page(struct page *page,
 
 static inline int free_pages_check(struct page *page)
 {
+       free_page_mlock(page);
        if (unlikely(page_mapcount(page) |
                (page->mapping != NULL)  |
-               (page_get_page_cgroup(page) != NULL) |
                (page_count(page) != 0)  |
                (page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
                bad_page(page);
        if (PageDirty(page))
                __ClearPageDirty(page);
+       if (PageSwapBacked(page))
+               __ClearPageSwapBacked(page);
        /*
         * For now, we report if PG_reserved was found set, but do not
         * clear it, and do not free the page.  But we shall soon need
@@ -600,7 +597,6 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 {
        if (unlikely(page_mapcount(page) |
                (page->mapping != NULL)  |
-               (page_get_page_cgroup(page) != NULL) |
                (page_count(page) != 0)  |
                (page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
                bad_page(page);
@@ -614,7 +610,11 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 
        page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
                        1 << PG_referenced | 1 << PG_arch_1 |
-                       1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
+                       1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
+#ifdef CONFIG_UNEVICTABLE_LRU
+                       | 1 << PG_mlocked
+#endif
+                       );
        set_page_private(page, 0);
        set_page_refcounted(page);
 
@@ -1862,10 +1862,21 @@ void show_free_areas(void)
                }
        }
 
-       printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+       printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
+               " inactive_file:%lu"
+//TODO:  check/adjust line lengths
+#ifdef CONFIG_UNEVICTABLE_LRU
+               " unevictable:%lu"
+#endif
+               " dirty:%lu writeback:%lu unstable:%lu\n"
                " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
-               global_page_state(NR_ACTIVE),
-               global_page_state(NR_INACTIVE),
+               global_page_state(NR_ACTIVE_ANON),
+               global_page_state(NR_ACTIVE_FILE),
+               global_page_state(NR_INACTIVE_ANON),
+               global_page_state(NR_INACTIVE_FILE),
+#ifdef CONFIG_UNEVICTABLE_LRU
+               global_page_state(NR_UNEVICTABLE),
+#endif
                global_page_state(NR_FILE_DIRTY),
                global_page_state(NR_WRITEBACK),
                global_page_state(NR_UNSTABLE_NFS),
@@ -1888,8 +1899,13 @@ void show_free_areas(void)
                        " min:%lukB"
                        " low:%lukB"
                        " high:%lukB"
-                       " active:%lukB"
-                       " inactive:%lukB"
+                       " active_anon:%lukB"
+                       " inactive_anon:%lukB"
+                       " active_file:%lukB"
+                       " inactive_file:%lukB"
+#ifdef CONFIG_UNEVICTABLE_LRU
+                       " unevictable:%lukB"
+#endif
                        " present:%lukB"
                        " pages_scanned:%lu"
                        " all_unreclaimable? %s"
@@ -1899,8 +1915,13 @@ void show_free_areas(void)
                        K(zone->pages_min),
                        K(zone->pages_low),
                        K(zone->pages_high),
-                       K(zone_page_state(zone, NR_ACTIVE)),
-                       K(zone_page_state(zone, NR_INACTIVE)),
+                       K(zone_page_state(zone, NR_ACTIVE_ANON)),
+                       K(zone_page_state(zone, NR_INACTIVE_ANON)),
+                       K(zone_page_state(zone, NR_ACTIVE_FILE)),
+                       K(zone_page_state(zone, NR_INACTIVE_FILE)),
+#ifdef CONFIG_UNEVICTABLE_LRU
+                       K(zone_page_state(zone, NR_UNEVICTABLE)),
+#endif
                        K(zone->present_pages),
                        zone->pages_scanned,
                        (zone_is_all_unreclaimable(zone) ? "yes" : "no")
@@ -3410,10 +3431,12 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
        pgdat->nr_zones = 0;
        init_waitqueue_head(&pgdat->kswapd_wait);
        pgdat->kswapd_max_order = 0;
+       pgdat_page_cgroup_init(pgdat);
        
        for (j = 0; j < MAX_NR_ZONES; j++) {
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize, memmap_pages;
+               enum lru_list l;
 
                size = zone_spanned_pages_in_node(nid, j, zones_size);
                realsize = size - zone_absent_pages_in_node(nid, j,
@@ -3428,8 +3451,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                        PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
                if (realsize >= memmap_pages) {
                        realsize -= memmap_pages;
-                       mminit_dprintk(MMINIT_TRACE, "memmap_init",
-                               "%s zone: %lu pages used for memmap\n",
+                       printk(KERN_DEBUG
+                               "  %s zone: %lu pages used for memmap\n",
                                zone_names[j], memmap_pages);
                } else
                        printk(KERN_WARNING
@@ -3439,8 +3462,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                /* Account for reserved pages */
                if (j == 0 && realsize > dma_reserve) {
                        realsize -= dma_reserve;
-                       mminit_dprintk(MMINIT_TRACE, "memmap_init",
-                                       "%s zone: %lu pages reserved\n",
+                       printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
                                        zone_names[0], dma_reserve);
                }
 
@@ -3465,10 +3487,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                zone->prev_priority = DEF_PRIORITY;
 
                zone_pcp_init(zone);
-               INIT_LIST_HEAD(&zone->active_list);
-               INIT_LIST_HEAD(&zone->inactive_list);
-               zone->nr_scan_active = 0;
-               zone->nr_scan_inactive = 0;
+               for_each_lru(l) {
+                       INIT_LIST_HEAD(&zone->lru[l].list);
+                       zone->lru[l].nr_scan = 0;
+               }
+               zone->recent_rotated[0] = 0;
+               zone->recent_rotated[1] = 0;
+               zone->recent_scanned[0] = 0;
+               zone->recent_scanned[1] = 0;
                zap_zone_vm_stats(zone);
                zone->flags = 0;
                if (!size)
@@ -4210,7 +4236,7 @@ void setup_per_zone_pages_min(void)
        for_each_zone(zone) {
                u64 tmp;
 
-               spin_lock_irqsave(&zone->lru_lock, flags);
+               spin_lock_irqsave(&zone->lock, flags);
                tmp = (u64)pages_min * zone->present_pages;
                do_div(tmp, lowmem_pages);
                if (is_highmem(zone)) {
@@ -4242,13 +4268,53 @@ void setup_per_zone_pages_min(void)
                zone->pages_low   = zone->pages_min + (tmp >> 2);
                zone->pages_high  = zone->pages_min + (tmp >> 1);
                setup_zone_migrate_reserve(zone);
-               spin_unlock_irqrestore(&zone->lru_lock, flags);
+               spin_unlock_irqrestore(&zone->lock, flags);
        }
 
        /* update totalreserve_pages */
        calculate_totalreserve_pages();
 }
 
+/**
+ * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
+ *
+ * The inactive anon list should be small enough that the VM never has to
+ * do too much work, but large enough that each inactive page has a chance
+ * to be referenced again before it is swapped out.
+ *
+ * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
+ * INACTIVE_ANON pages on this zone's LRU, maintained by the
+ * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
+ * the anonymous pages are kept on the inactive list.
+ *
+ * total     target    max
+ * memory    ratio     inactive anon
+ * -------------------------------------
+ *   10MB       1         5MB
+ *  100MB       1        50MB
+ *    1GB       3       250MB
+ *   10GB      10       0.9GB
+ *  100GB      31         3GB
+ *    1TB     101        10GB
+ *   10TB     320        32GB
+ */
+void setup_per_zone_inactive_ratio(void)
+{
+       struct zone *zone;
+
+       for_each_zone(zone) {
+               unsigned int gb, ratio;
+
+               /* Zone size in gigabytes */
+               gb = zone->present_pages >> (30 - PAGE_SHIFT);
+               ratio = int_sqrt(10 * gb);
+               if (!ratio)
+                       ratio = 1;
+
+               zone->inactive_ratio = ratio;
+       }
+}
+
 /*
  * Initialise min_free_kbytes.
  *
@@ -4286,6 +4352,7 @@ static int __init init_per_zone_pages_min(void)
                min_free_kbytes = 65536;
        setup_per_zone_pages_min();
        setup_per_zone_lowmem_reserve();
+       setup_per_zone_inactive_ratio();
        return 0;
 }
 module_init(init_per_zone_pages_min)
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
new file mode 100644 (file)
index 0000000..5d86550
--- /dev/null
@@ -0,0 +1,237 @@
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/bit_spinlock.h>
+#include <linux/page_cgroup.h>
+#include <linux/hash.h>
+#include <linux/memory.h>
+
+static void __meminit
+__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
+{
+       pc->flags = 0;
+       pc->mem_cgroup = NULL;
+       pc->page = pfn_to_page(pfn);
+}
+static unsigned long total_usage;
+
+#if !defined(CONFIG_SPARSEMEM)
+
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+       pgdat->node_page_cgroup = NULL;
+}
+
+struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+       unsigned long pfn = page_to_pfn(page);
+       unsigned long offset;
+       struct page_cgroup *base;
+
+       base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
+       if (unlikely(!base))
+               return NULL;
+
+       offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
+       return base + offset;
+}
+
+static int __init alloc_node_page_cgroup(int nid)
+{
+       struct page_cgroup *base, *pc;
+       unsigned long table_size;
+       unsigned long start_pfn, nr_pages, index;
+
+       start_pfn = NODE_DATA(nid)->node_start_pfn;
+       nr_pages = NODE_DATA(nid)->node_spanned_pages;
+
+       table_size = sizeof(struct page_cgroup) * nr_pages;
+
+       base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
+                       table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+       if (!base)
+               return -ENOMEM;
+       for (index = 0; index < nr_pages; index++) {
+               pc = base + index;
+               __init_page_cgroup(pc, start_pfn + index);
+       }
+       NODE_DATA(nid)->node_page_cgroup = base;
+       total_usage += table_size;
+       return 0;
+}
+
+void __init page_cgroup_init(void)
+{
+
+       int nid, fail;
+
+       for_each_online_node(nid)  {
+               fail = alloc_node_page_cgroup(nid);
+               if (fail)
+                       goto fail;
+       }
+       printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
+       printk(KERN_INFO "please try cgroup_disable=memory option if you"
+       " don't want\n");
+       return;
+fail:
+       printk(KERN_CRIT "allocation of page_cgroup was failed.\n");
+       printk(KERN_CRIT "please try cgroup_disable=memory boot option\n");
+       panic("Out of memory");
+}
+
+#else /* CONFIG_FLAT_NODE_MEM_MAP */
+
+struct page_cgroup *lookup_page_cgroup(struct page *page)
+{
+       unsigned long pfn = page_to_pfn(page);
+       struct mem_section *section = __pfn_to_section(pfn);
+
+       return section->page_cgroup + pfn;
+}
+
+int __meminit init_section_page_cgroup(unsigned long pfn)
+{
+       struct mem_section *section;
+       struct page_cgroup *base, *pc;
+       unsigned long table_size;
+       int nid, index;
+
+       section = __pfn_to_section(pfn);
+
+       if (section->page_cgroup)
+               return 0;
+
+       nid = page_to_nid(pfn_to_page(pfn));
+
+       table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+       base = kmalloc_node(table_size, GFP_KERNEL, nid);
+       if (!base)
+               base = vmalloc_node(table_size, nid);
+
+       if (!base) {
+               printk(KERN_ERR "page cgroup allocation failure\n");
+               return -ENOMEM;
+       }
+
+       for (index = 0; index < PAGES_PER_SECTION; index++) {
+               pc = base + index;
+               __init_page_cgroup(pc, pfn + index);
+       }
+
+       section = __pfn_to_section(pfn);
+       section->page_cgroup = base - pfn;
+       total_usage += table_size;
+       return 0;
+}
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __free_page_cgroup(unsigned long pfn)
+{
+       struct mem_section *ms;
+       struct page_cgroup *base;
+
+       ms = __pfn_to_section(pfn);
+       if (!ms || !ms->page_cgroup)
+               return;
+       base = ms->page_cgroup + pfn;
+       ms->page_cgroup = NULL;
+       if (is_vmalloc_addr(base))
+               vfree(base);
+       else
+               kfree(base);
+}
+
+int online_page_cgroup(unsigned long start_pfn,
+                       unsigned long nr_pages,
+                       int nid)
+{
+       unsigned long start, end, pfn;
+       int fail = 0;
+
+       start = start_pfn & (PAGES_PER_SECTION - 1);
+       end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
+
+       for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
+               if (!pfn_present(pfn))
+                       continue;
+               fail = init_section_page_cgroup(pfn);
+       }
+       if (!fail)
+               return 0;
+
+       /* rollback */
+       for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+               __free_page_cgroup(pfn);
+
+       return -ENOMEM;
+}
+
+int offline_page_cgroup(unsigned long start_pfn,
+               unsigned long nr_pages, int nid)
+{
+       unsigned long start, end, pfn;
+
+       start = start_pfn & (PAGES_PER_SECTION - 1);
+       end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
+
+       for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+               __free_page_cgroup(pfn);
+       return 0;
+
+}
+
+static int page_cgroup_callback(struct notifier_block *self,
+                              unsigned long action, void *arg)
+{
+       struct memory_notify *mn = arg;
+       int ret = 0;
+       switch (action) {
+       case MEM_GOING_ONLINE:
+               ret = online_page_cgroup(mn->start_pfn,
+                                  mn->nr_pages, mn->status_change_nid);
+               break;
+       case MEM_CANCEL_ONLINE:
+       case MEM_OFFLINE:
+               offline_page_cgroup(mn->start_pfn,
+                               mn->nr_pages, mn->status_change_nid);
+               break;
+       case MEM_GOING_OFFLINE:
+               break;
+       case MEM_ONLINE:
+       case MEM_CANCEL_OFFLINE:
+               break;
+       }
+       ret = notifier_from_errno(ret);
+       return ret;
+}
+
+#endif
+
+void __init page_cgroup_init(void)
+{
+       unsigned long pfn;
+       int fail = 0;
+
+       for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
+               if (!pfn_present(pfn))
+                       continue;
+               fail = init_section_page_cgroup(pfn);
+       }
+       if (fail) {
+               printk(KERN_CRIT "try cgroup_disable=memory boot option\n");
+               panic("Out of memory");
+       } else {
+               hotplug_memory_notifier(page_cgroup_callback, 0);
+       }
+       printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
+       printk(KERN_INFO "please try cgroup_disable=memory option if you don't"
+       " want\n");
+}
+
+void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+{
+       return;
+}
+
+#endif
index 6cbd9a72fde2c8c1e20bd1c3da9144fa0d13d65c..bec83c15a78f61b58a1dfbb74a336b9848b5d876 100644 (file)
@@ -229,7 +229,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
  */
 unsigned long max_sane_readahead(unsigned long nr)
 {
-       return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
+       return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
                + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
 }
 
index 0383acfcb0681a877f15f59d4d712980c37e1811..10993942d6c989314477291520c6bb6980a0439d 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
 
 #include <asm/tlbflush.h>
 
-struct kmem_cache *anon_vma_cachep;
+#include "internal.h"
 
-/* This must be called under the mmap_sem. */
+static struct kmem_cache *anon_vma_cachep;
+
+static inline struct anon_vma *anon_vma_alloc(void)
+{
+       return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+}
+
+static inline void anon_vma_free(struct anon_vma *anon_vma)
+{
+       kmem_cache_free(anon_vma_cachep, anon_vma);
+}
+
+/**
+ * anon_vma_prepare - attach an anon_vma to a memory region
+ * @vma: the memory region in question
+ *
+ * This makes sure the memory mapping described by 'vma' has
+ * an 'anon_vma' attached to it, so that we can associate the
+ * anonymous pages mapped into it with that anon_vma.
+ *
+ * The common case will be that we already have one, but if
+ * if not we either need to find an adjacent mapping that we
+ * can re-use the anon_vma from (very common when the only
+ * reason for splitting a vma has been mprotect()), or we
+ * allocate a new one.
+ *
+ * Anon-vma allocations are very subtle, because we may have
+ * optimistically looked up an anon_vma in page_lock_anon_vma()
+ * and that may actually touch the spinlock even in the newly
+ * allocated vma (it depends on RCU to make sure that the
+ * anon_vma isn't actually destroyed).
+ *
+ * As a result, we need to do proper anon_vma locking even
+ * for the new allocation. At the same time, we do not want
+ * to do any locking for the common case of already having
+ * an anon_vma.
+ *
+ * This must be called with the mmap_sem held for reading.
+ */
 int anon_vma_prepare(struct vm_area_struct *vma)
 {
        struct anon_vma *anon_vma = vma->anon_vma;
@@ -63,20 +101,17 @@ int anon_vma_prepare(struct vm_area_struct *vma)
        might_sleep();
        if (unlikely(!anon_vma)) {
                struct mm_struct *mm = vma->vm_mm;
-               struct anon_vma *allocated, *locked;
+               struct anon_vma *allocated;
 
                anon_vma = find_mergeable_anon_vma(vma);
-               if (anon_vma) {
-                       allocated = NULL;
-                       locked = anon_vma;
-                       spin_lock(&locked->lock);
-               } else {
+               allocated = NULL;
+               if (!anon_vma) {
                        anon_vma = anon_vma_alloc();
                        if (unlikely(!anon_vma))
                                return -ENOMEM;
                        allocated = anon_vma;
-                       locked = NULL;
                }
+               spin_lock(&anon_vma->lock);
 
                /* page_table_lock to protect against threads */
                spin_lock(&mm->page_table_lock);
@@ -87,8 +122,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
                }
                spin_unlock(&mm->page_table_lock);
 
-               if (locked)
-                       spin_unlock(&locked->lock);
+               spin_unlock(&anon_vma->lock);
                if (unlikely(allocated))
                        anon_vma_free(allocated);
        }
@@ -157,7 +191,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
        struct anon_vma *anon_vma;
        unsigned long anon_mapping;
@@ -177,7 +211,7 @@ out:
        return NULL;
 }
 
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
 {
        spin_unlock(&anon_vma->lock);
        rcu_read_unlock();
@@ -268,6 +302,32 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
        return NULL;
 }
 
+/**
+ * page_mapped_in_vma - check whether a page is really mapped in a VMA
+ * @page: the page to test
+ * @vma: the VMA to test
+ *
+ * Returns 1 if the page is mapped into the page tables of the VMA, 0
+ * if the page is not mapped into the page tables of this VMA.  Only
+ * valid for normal file or anonymous VMAs.
+ */
+static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+{
+       unsigned long address;
+       pte_t *pte;
+       spinlock_t *ptl;
+
+       address = vma_address(page, vma);
+       if (address == -EFAULT)         /* out of vma range */
+               return 0;
+       pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
+       if (!pte)                       /* the page is not in this mm */
+               return 0;
+       pte_unmap_unlock(pte, ptl);
+
+       return 1;
+}
+
 /*
  * Subfunctions of page_referenced: page_referenced_one called
  * repeatedly from either page_referenced_anon or page_referenced_file.
@@ -289,10 +349,17 @@ static int page_referenced_one(struct page *page,
        if (!pte)
                goto out;
 
+       /*
+        * Don't want to elevate referenced for mlocked page that gets this far,
+        * in order that it progresses to try_to_unmap and is moved to the
+        * unevictable list.
+        */
        if (vma->vm_flags & VM_LOCKED) {
-               referenced++;
                *mapcount = 1;  /* break early from loop */
-       } else if (ptep_clear_flush_young_notify(vma, address, pte))
+               goto out_unmap;
+       }
+
+       if (ptep_clear_flush_young_notify(vma, address, pte))
                referenced++;
 
        /* Pretend the page is referenced if the task has the
@@ -301,6 +368,7 @@ static int page_referenced_one(struct page *page,
                        rwsem_is_locked(&mm->mmap_sem))
                referenced++;
 
+out_unmap:
        (*mapcount)--;
        pte_unmap_unlock(pte, ptl);
 out:
@@ -390,11 +458,6 @@ static int page_referenced_file(struct page *page,
                 */
                if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
                        continue;
-               if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
-                                 == (VM_LOCKED|VM_MAYSHARE)) {
-                       referenced++;
-                       break;
-               }
                referenced += page_referenced_one(page, vma, &mapcount);
                if (!mapcount)
                        break;
@@ -674,8 +737,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
                        page_clear_dirty(page);
                        set_page_dirty(page);
                }
-
-               mem_cgroup_uncharge_page(page);
+               if (PageAnon(page))
+                       mem_cgroup_uncharge_page(page);
                __dec_zone_page_state(page,
                        PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
                /*
@@ -717,11 +780,16 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         * If it's recently referenced (perhaps page_referenced
         * skipped over this mm) then we should reactivate it.
         */
-       if (!migration && ((vma->vm_flags & VM_LOCKED) ||
-                       (ptep_clear_flush_young_notify(vma, address, pte)))) {
-               ret = SWAP_FAIL;
-               goto out_unmap;
-       }
+       if (!migration) {
+               if (vma->vm_flags & VM_LOCKED) {
+                       ret = SWAP_MLOCK;
+                       goto out_unmap;
+               }
+               if (ptep_clear_flush_young_notify(vma, address, pte)) {
+                       ret = SWAP_FAIL;
+                       goto out_unmap;
+               }
+       }
 
        /* Nuke the page table entry. */
        flush_cache_page(vma, address, page_to_pfn(page));
@@ -802,12 +870,17 @@ out:
  * For very sparsely populated VMAs this is a little inefficient - chances are
  * there there won't be many ptes located within the scan cluster.  In this case
  * maybe we could scan further - to the end of the pte page, perhaps.
+ *
+ * Mlocked pages:  check VM_LOCKED under mmap_sem held for read, if we can
+ * acquire it without blocking.  If vma locked, mlock the pages in the cluster,
+ * rather than unmapping them.  If we encounter the "check_page" that vmscan is
+ * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
  */
 #define CLUSTER_SIZE   min(32*PAGE_SIZE, PMD_SIZE)
 #define CLUSTER_MASK   (~(CLUSTER_SIZE - 1))
 
-static void try_to_unmap_cluster(unsigned long cursor,
-       unsigned int *mapcount, struct vm_area_struct *vma)
+static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
+               struct vm_area_struct *vma, struct page *check_page)
 {
        struct mm_struct *mm = vma->vm_mm;
        pgd_t *pgd;
@@ -819,6 +892,8 @@ static void try_to_unmap_cluster(unsigned long cursor,
        struct page *page;
        unsigned long address;
        unsigned long end;
+       int ret = SWAP_AGAIN;
+       int locked_vma = 0;
 
        address = (vma->vm_start + cursor) & CLUSTER_MASK;
        end = address + CLUSTER_SIZE;
@@ -829,15 +904,26 @@ static void try_to_unmap_cluster(unsigned long cursor,
 
        pgd = pgd_offset(mm, address);
        if (!pgd_present(*pgd))
-               return;
+               return ret;
 
        pud = pud_offset(pgd, address);
        if (!pud_present(*pud))
-               return;
+               return ret;
 
        pmd = pmd_offset(pud, address);
        if (!pmd_present(*pmd))
-               return;
+               return ret;
+
+       /*
+        * MLOCK_PAGES => feature is configured.
+        * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
+        * keep the sem while scanning the cluster for mlocking pages.
+        */
+       if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) {
+               locked_vma = (vma->vm_flags & VM_LOCKED);
+               if (!locked_vma)
+                       up_read(&vma->vm_mm->mmap_sem); /* don't need it */
+       }
 
        pte = pte_offset_map_lock(mm, pmd, address, &ptl);
 
@@ -850,6 +936,13 @@ static void try_to_unmap_cluster(unsigned long cursor,
                page = vm_normal_page(vma, address, *pte);
                BUG_ON(!page || PageAnon(page));
 
+               if (locked_vma) {
+                       mlock_vma_page(page);   /* no-op if already mlocked */
+                       if (page == check_page)
+                               ret = SWAP_MLOCK;
+                       continue;       /* don't unmap */
+               }
+
                if (ptep_clear_flush_young_notify(vma, address, pte))
                        continue;
 
@@ -871,39 +964,104 @@ static void try_to_unmap_cluster(unsigned long cursor,
                (*mapcount)--;
        }
        pte_unmap_unlock(pte - 1, ptl);
+       if (locked_vma)
+               up_read(&vma->vm_mm->mmap_sem);
+       return ret;
 }
 
-static int try_to_unmap_anon(struct page *page, int migration)
+/*
+ * common handling for pages mapped in VM_LOCKED vmas
+ */
+static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
+{
+       int mlocked = 0;
+
+       if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
+               if (vma->vm_flags & VM_LOCKED) {
+                       mlock_vma_page(page);
+                       mlocked++;      /* really mlocked the page */
+               }
+               up_read(&vma->vm_mm->mmap_sem);
+       }
+       return mlocked;
+}
+
+/**
+ * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
+ * rmap method
+ * @page: the page to unmap/unlock
+ * @unlock:  request for unlock rather than unmap [unlikely]
+ * @migration:  unmapping for migration - ignored if @unlock
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the anon_vma struct it points to.
+ *
+ * This function is only called from try_to_unmap/try_to_munlock for
+ * anonymous pages.
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * 'LOCKED.
+ */
+static int try_to_unmap_anon(struct page *page, int unlock, int migration)
 {
        struct anon_vma *anon_vma;
        struct vm_area_struct *vma;
+       unsigned int mlocked = 0;
        int ret = SWAP_AGAIN;
 
+       if (MLOCK_PAGES && unlikely(unlock))
+               ret = SWAP_SUCCESS;     /* default for try_to_munlock() */
+
        anon_vma = page_lock_anon_vma(page);
        if (!anon_vma)
                return ret;
 
        list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
-               ret = try_to_unmap_one(page, vma, migration);
-               if (ret == SWAP_FAIL || !page_mapped(page))
-                       break;
+               if (MLOCK_PAGES && unlikely(unlock)) {
+                       if (!((vma->vm_flags & VM_LOCKED) &&
+                             page_mapped_in_vma(page, vma)))
+                               continue;  /* must visit all unlocked vmas */
+                       ret = SWAP_MLOCK;  /* saw at least one mlocked vma */
+               } else {
+                       ret = try_to_unmap_one(page, vma, migration);
+                       if (ret == SWAP_FAIL || !page_mapped(page))
+                               break;
+               }
+               if (ret == SWAP_MLOCK) {
+                       mlocked = try_to_mlock_page(page, vma);
+                       if (mlocked)
+                               break;  /* stop if actually mlocked page */
+               }
        }
 
        page_unlock_anon_vma(anon_vma);
+
+       if (mlocked)
+               ret = SWAP_MLOCK;       /* actually mlocked the page */
+       else if (ret == SWAP_MLOCK)
+               ret = SWAP_AGAIN;       /* saw VM_LOCKED vma */
+
        return ret;
 }
 
 /**
- * try_to_unmap_file - unmap file page using the object-based rmap method
- * @page: the page to unmap
- * @migration: migration flag
+ * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
+ * @page: the page to unmap/unlock
+ * @unlock:  request for unlock rather than unmap [unlikely]
+ * @migration:  unmapping for migration - ignored if @unlock
  *
  * Find all the mappings of a page using the mapping pointer and the vma chains
  * contained in the address_space struct it points to.
  *
- * This function is only called from try_to_unmap for object-based pages.
+ * This function is only called from try_to_unmap/try_to_munlock for
+ * object-based pages.
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * 'LOCKED.
  */
-static int try_to_unmap_file(struct page *page, int migration)
+static int try_to_unmap_file(struct page *page, int unlock, int migration)
 {
        struct address_space *mapping = page->mapping;
        pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -914,20 +1072,44 @@ static int try_to_unmap_file(struct page *page, int migration)
        unsigned long max_nl_cursor = 0;
        unsigned long max_nl_size = 0;
        unsigned int mapcount;
+       unsigned int mlocked = 0;
+
+       if (MLOCK_PAGES && unlikely(unlock))
+               ret = SWAP_SUCCESS;     /* default for try_to_munlock() */
 
        spin_lock(&mapping->i_mmap_lock);
        vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
-               ret = try_to_unmap_one(page, vma, migration);
-               if (ret == SWAP_FAIL || !page_mapped(page))
-                       goto out;
+               if (MLOCK_PAGES && unlikely(unlock)) {
+                       if (!(vma->vm_flags & VM_LOCKED))
+                               continue;       /* must visit all vmas */
+                       ret = SWAP_MLOCK;
+               } else {
+                       ret = try_to_unmap_one(page, vma, migration);
+                       if (ret == SWAP_FAIL || !page_mapped(page))
+                               goto out;
+               }
+               if (ret == SWAP_MLOCK) {
+                       mlocked = try_to_mlock_page(page, vma);
+                       if (mlocked)
+                               break;  /* stop if actually mlocked page */
+               }
        }
 
+       if (mlocked)
+               goto out;
+
        if (list_empty(&mapping->i_mmap_nonlinear))
                goto out;
 
        list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
                                                shared.vm_set.list) {
-               if ((vma->vm_flags & VM_LOCKED) && !migration)
+               if (MLOCK_PAGES && unlikely(unlock)) {
+                       if (!(vma->vm_flags & VM_LOCKED))
+                               continue;       /* must visit all vmas */
+                       ret = SWAP_MLOCK;       /* leave mlocked == 0 */
+                       goto out;               /* no need to look further */
+               }
+               if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
                        continue;
                cursor = (unsigned long) vma->vm_private_data;
                if (cursor > max_nl_cursor)
@@ -937,7 +1119,7 @@ static int try_to_unmap_file(struct page *page, int migration)
                        max_nl_size = cursor;
        }
 
-       if (max_nl_size == 0) { /* any nonlinears locked or reserved */
+       if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
                ret = SWAP_FAIL;
                goto out;
        }
@@ -961,12 +1143,16 @@ static int try_to_unmap_file(struct page *page, int migration)
        do {
                list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
                                                shared.vm_set.list) {
-                       if ((vma->vm_flags & VM_LOCKED) && !migration)
+                       if (!MLOCK_PAGES && !migration &&
+                           (vma->vm_flags & VM_LOCKED))
                                continue;
                        cursor = (unsigned long) vma->vm_private_data;
                        while ( cursor < max_nl_cursor &&
                                cursor < vma->vm_end - vma->vm_start) {
-                               try_to_unmap_cluster(cursor, &mapcount, vma);
+                               ret = try_to_unmap_cluster(cursor, &mapcount,
+                                                               vma, page);
+                               if (ret == SWAP_MLOCK)
+                                       mlocked = 2;    /* to return below */
                                cursor += CLUSTER_SIZE;
                                vma->vm_private_data = (void *) cursor;
                                if ((int)mapcount <= 0)
@@ -987,6 +1173,10 @@ static int try_to_unmap_file(struct page *page, int migration)
                vma->vm_private_data = NULL;
 out:
        spin_unlock(&mapping->i_mmap_lock);
+       if (mlocked)
+               ret = SWAP_MLOCK;       /* actually mlocked the page */
+       else if (ret == SWAP_MLOCK)
+               ret = SWAP_AGAIN;       /* saw VM_LOCKED vma */
        return ret;
 }
 
@@ -1002,6 +1192,7 @@ out:
  * SWAP_SUCCESS        - we succeeded in removing all mappings
  * SWAP_AGAIN  - we missed a mapping, try again later
  * SWAP_FAIL   - the page is unswappable
+ * SWAP_MLOCK  - page is mlocked.
  */
 int try_to_unmap(struct page *page, int migration)
 {
@@ -1010,12 +1201,36 @@ int try_to_unmap(struct page *page, int migration)
        BUG_ON(!PageLocked(page));
 
        if (PageAnon(page))
-               ret = try_to_unmap_anon(page, migration);
+               ret = try_to_unmap_anon(page, 0, migration);
        else
-               ret = try_to_unmap_file(page, migration);
-
-       if (!page_mapped(page))
+               ret = try_to_unmap_file(page, 0, migration);
+       if (ret != SWAP_MLOCK && !page_mapped(page))
                ret = SWAP_SUCCESS;
        return ret;
 }
 
+#ifdef CONFIG_UNEVICTABLE_LRU
+/**
+ * try_to_munlock - try to munlock a page
+ * @page: the page to be munlocked
+ *
+ * Called from munlock code.  Checks all of the VMAs mapping the page
+ * to make sure nobody else has this page mlocked. The page will be
+ * returned with PG_mlocked cleared if no other vmas have it mlocked.
+ *
+ * Return values are:
+ *
+ * SWAP_SUCCESS        - no vma's holding page mlocked.
+ * SWAP_AGAIN  - page mapped in mlocked vma -- couldn't acquire mmap sem
+ * SWAP_MLOCK  - page is now mlocked.
+ */
+int try_to_munlock(struct page *page)
+{
+       VM_BUG_ON(!PageLocked(page) || PageLRU(page));
+
+       if (PageAnon(page))
+               return try_to_unmap_anon(page, 1, 0);
+       else
+               return try_to_unmap_file(page, 1, 0);
+}
+#endif
index d87958a5f03e95b2cb851722fb91d2c6e5447f66..d38d7e61fcd0bed76af19f52ac9fc03d3a9eb262 100644 (file)
@@ -199,7 +199,7 @@ static struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
        .ra_pages       = 0,    /* No readahead */
-       .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
+       .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
        .unplug_io_fn   = default_unplug_io_fn,
 };
 
@@ -1367,6 +1367,7 @@ repeat:
                                error = -ENOMEM;
                                goto failed;
                        }
+                       SetPageSwapBacked(filepage);
 
                        /* Precharge page while we can wait, compensate after */
                        error = mem_cgroup_cache_charge(filepage, current->mm,
@@ -1476,12 +1477,16 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
                if (!user_shm_lock(inode->i_size, user))
                        goto out_nomem;
                info->flags |= VM_LOCKED;
+               mapping_set_unevictable(file->f_mapping);
        }
        if (!lock && (info->flags & VM_LOCKED) && user) {
                user_shm_unlock(inode->i_size, user);
                info->flags &= ~VM_LOCKED;
+               mapping_clear_unevictable(file->f_mapping);
+               scan_mapping_unevictable_pages(file->f_mapping);
        }
        retval = 0;
+
 out_nomem:
        spin_unlock(&info->lock);
        return retval;
index 9e0cb3118079e6eae0cf27fed41b8f2ff59f8bc2..2152e48a7b8f0726f06a88c516d82a0becf3c19d 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
 #include <linux/backing-dev.h>
 #include <linux/memcontrol.h>
 
+#include "internal.h"
+
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
-static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs);
+static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 
 /*
@@ -116,8 +117,9 @@ static void pagevec_move_tail(struct pagevec *pvec)
                        zone = pagezone;
                        spin_lock(&zone->lru_lock);
                }
-               if (PageLRU(page) && !PageActive(page)) {
-                       list_move_tail(&page->lru, &zone->inactive_list);
+               if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+                       int lru = page_is_file_cache(page);
+                       list_move_tail(&page->lru, &zone->lru[lru].list);
                        pgmoved++;
                }
        }
@@ -136,7 +138,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
 void  rotate_reclaimable_page(struct page *page)
 {
        if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
-           PageLRU(page)) {
+           !PageUnevictable(page) && PageLRU(page)) {
                struct pagevec *pvec;
                unsigned long flags;
 
@@ -157,12 +159,19 @@ void activate_page(struct page *page)
        struct zone *zone = page_zone(page);
 
        spin_lock_irq(&zone->lru_lock);
-       if (PageLRU(page) && !PageActive(page)) {
-               del_page_from_inactive_list(zone, page);
+       if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+               int file = page_is_file_cache(page);
+               int lru = LRU_BASE + file;
+               del_page_from_lru_list(zone, page, lru);
+
                SetPageActive(page);
-               add_page_to_active_list(zone, page);
+               lru += LRU_ACTIVE;
+               add_page_to_lru_list(zone, page, lru);
                __count_vm_event(PGACTIVATE);
-               mem_cgroup_move_lists(page, true);
+               mem_cgroup_move_lists(page, lru);
+
+               zone->recent_rotated[!!file]++;
+               zone->recent_scanned[!!file]++;
        }
        spin_unlock_irq(&zone->lru_lock);
 }
@@ -176,7 +185,8 @@ void activate_page(struct page *page)
  */
 void mark_page_accessed(struct page *page)
 {
-       if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
+       if (!PageActive(page) && !PageUnevictable(page) &&
+                       PageReferenced(page) && PageLRU(page)) {
                activate_page(page);
                ClearPageReferenced(page);
        } else if (!PageReferenced(page)) {
@@ -186,28 +196,73 @@ void mark_page_accessed(struct page *page)
 
 EXPORT_SYMBOL(mark_page_accessed);
 
-/**
- * lru_cache_add: add a page to the page lists
- * @page: the page to add
- */
-void lru_cache_add(struct page *page)
+void __lru_cache_add(struct page *page, enum lru_list lru)
 {
-       struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
+       struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
 
        page_cache_get(page);
        if (!pagevec_add(pvec, page))
-               __pagevec_lru_add(pvec);
+               ____pagevec_lru_add(pvec, lru);
        put_cpu_var(lru_add_pvecs);
 }
 
-void lru_cache_add_active(struct page *page)
+/**
+ * lru_cache_add_lru - add a page to a page list
+ * @page: the page to be added to the LRU.
+ * @lru: the LRU list to which the page is added.
+ */
+void lru_cache_add_lru(struct page *page, enum lru_list lru)
 {
-       struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
+       if (PageActive(page)) {
+               VM_BUG_ON(PageUnevictable(page));
+               ClearPageActive(page);
+       } else if (PageUnevictable(page)) {
+               VM_BUG_ON(PageActive(page));
+               ClearPageUnevictable(page);
+       }
 
-       page_cache_get(page);
-       if (!pagevec_add(pvec, page))
-               __pagevec_lru_add_active(pvec);
-       put_cpu_var(lru_add_active_pvecs);
+       VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
+       __lru_cache_add(page, lru);
+}
+
+/**
+ * add_page_to_unevictable_list - add a page to the unevictable list
+ * @page:  the page to be added to the unevictable list
+ *
+ * Add page directly to its zone's unevictable list.  To avoid races with
+ * tasks that might be making the page evictable, through eg. munlock,
+ * munmap or exit, while it's not on the lru, we want to add the page
+ * while it's locked or otherwise "invisible" to other tasks.  This is
+ * difficult to do when using the pagevec cache, so bypass that.
+ */
+void add_page_to_unevictable_list(struct page *page)
+{
+       struct zone *zone = page_zone(page);
+
+       spin_lock_irq(&zone->lru_lock);
+       SetPageUnevictable(page);
+       SetPageLRU(page);
+       add_page_to_lru_list(zone, page, LRU_UNEVICTABLE);
+       spin_unlock_irq(&zone->lru_lock);
+}
+
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page:  the page to be added to LRU
+ * @vma:   vma in which page is mapped for determining reclaimability
+ *
+ * place @page on active or unevictable LRU list, depending on
+ * page_evictable().  Note that if the page is not evictable,
+ * it goes directly back onto it's zone's unevictable list.  It does
+ * NOT use a per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+                                       struct vm_area_struct *vma)
+{
+       if (page_evictable(page, vma))
+               lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page));
+       else
+               add_page_to_unevictable_list(page);
 }
 
 /*
@@ -217,15 +272,15 @@ void lru_cache_add_active(struct page *page)
  */
 static void drain_cpu_pagevecs(int cpu)
 {
+       struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
        struct pagevec *pvec;
+       int lru;
 
-       pvec = &per_cpu(lru_add_pvecs, cpu);
-       if (pagevec_count(pvec))
-               __pagevec_lru_add(pvec);
-
-       pvec = &per_cpu(lru_add_active_pvecs, cpu);
-       if (pagevec_count(pvec))
-               __pagevec_lru_add_active(pvec);
+       for_each_lru(lru) {
+               pvec = &pvecs[lru - LRU_BASE];
+               if (pagevec_count(pvec))
+                       ____pagevec_lru_add(pvec, lru);
+       }
 
        pvec = &per_cpu(lru_rotate_pvecs, cpu);
        if (pagevec_count(pvec)) {
@@ -244,7 +299,7 @@ void lru_add_drain(void)
        put_cpu();
 }
 
-#ifdef CONFIG_NUMA
+#if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU)
 static void lru_add_drain_per_cpu(struct work_struct *dummy)
 {
        lru_add_drain();
@@ -308,6 +363,7 @@ void release_pages(struct page **pages, int nr, int cold)
 
                if (PageLRU(page)) {
                        struct zone *pagezone = page_zone(page);
+
                        if (pagezone != zone) {
                                if (zone)
                                        spin_unlock_irqrestore(&zone->lru_lock,
@@ -380,10 +436,11 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
-void __pagevec_lru_add(struct pagevec *pvec)
+void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
        int i;
        struct zone *zone = NULL;
+       VM_BUG_ON(is_unevictable_lru(lru));
 
        for (i = 0; i < pagevec_count(pvec); i++) {
                struct page *page = pvec->pages[i];
@@ -395,9 +452,13 @@ void __pagevec_lru_add(struct pagevec *pvec)
                        zone = pagezone;
                        spin_lock_irq(&zone->lru_lock);
                }
+               VM_BUG_ON(PageActive(page));
+               VM_BUG_ON(PageUnevictable(page));
                VM_BUG_ON(PageLRU(page));
                SetPageLRU(page);
-               add_page_to_inactive_list(zone, page);
+               if (is_active_lru(lru))
+                       SetPageActive(page);
+               add_page_to_lru_list(zone, page, lru);
        }
        if (zone)
                spin_unlock_irq(&zone->lru_lock);
@@ -405,48 +466,45 @@ void __pagevec_lru_add(struct pagevec *pvec)
        pagevec_reinit(pvec);
 }
 
-EXPORT_SYMBOL(__pagevec_lru_add);
+EXPORT_SYMBOL(____pagevec_lru_add);
 
-void __pagevec_lru_add_active(struct pagevec *pvec)
+/*
+ * Try to drop buffers from the pages in a pagevec
+ */
+void pagevec_strip(struct pagevec *pvec)
 {
        int i;
-       struct zone *zone = NULL;
 
        for (i = 0; i < pagevec_count(pvec); i++) {
                struct page *page = pvec->pages[i];
-               struct zone *pagezone = page_zone(page);
 
-               if (pagezone != zone) {
-                       if (zone)
-                               spin_unlock_irq(&zone->lru_lock);
-                       zone = pagezone;
-                       spin_lock_irq(&zone->lru_lock);
+               if (PagePrivate(page) && trylock_page(page)) {
+                       if (PagePrivate(page))
+                               try_to_release_page(page, 0);
+                       unlock_page(page);
                }
-               VM_BUG_ON(PageLRU(page));
-               SetPageLRU(page);
-               VM_BUG_ON(PageActive(page));
-               SetPageActive(page);
-               add_page_to_active_list(zone, page);
        }
-       if (zone)
-               spin_unlock_irq(&zone->lru_lock);
-       release_pages(pvec->pages, pvec->nr, pvec->cold);
-       pagevec_reinit(pvec);
 }
 
-/*
- * Try to drop buffers from the pages in a pagevec
+/**
+ * pagevec_swap_free - try to free swap space from the pages in a pagevec
+ * @pvec: pagevec with swapcache pages to free the swap space of
+ *
+ * The caller needs to hold an extra reference to each page and
+ * not hold the page lock on the pages.  This function uses a
+ * trylock on the page lock so it may not always free the swap
+ * space associated with a page.
  */
-void pagevec_strip(struct pagevec *pvec)
+void pagevec_swap_free(struct pagevec *pvec)
 {
        int i;
 
        for (i = 0; i < pagevec_count(pvec); i++) {
                struct page *page = pvec->pages[i];
 
-               if (PagePrivate(page) && trylock_page(page)) {
-                       if (PagePrivate(page))
-                               try_to_release_page(page, 0);
+               if (PageSwapCache(page) && trylock_page(page)) {
+                       if (PageSwapCache(page))
+                               remove_exclusive_swap_page_ref(page);
                        unlock_page(page);
                }
        }
index 797c3831cbec7401ccfecd3ecdc98c7667c8d6bb..3353c9029cef1cdf4b3f86231bc931b0d732f540 100644 (file)
@@ -33,7 +33,7 @@ static const struct address_space_operations swap_aops = {
 };
 
 static struct backing_dev_info swap_backing_dev_info = {
-       .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
+       .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
        .unplug_io_fn   = swap_unplug_io_fn,
 };
 
@@ -75,6 +75,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
        BUG_ON(!PageLocked(page));
        BUG_ON(PageSwapCache(page));
        BUG_ON(PagePrivate(page));
+       BUG_ON(!PageSwapBacked(page));
        error = radix_tree_preload(gfp_mask);
        if (!error) {
                page_cache_get(page);
@@ -302,17 +303,19 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                 * re-using the just freed swap entry for an existing page.
                 * May fail (-ENOMEM) if radix-tree node allocation failed.
                 */
-               set_page_locked(new_page);
+               __set_page_locked(new_page);
+               SetPageSwapBacked(new_page);
                err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
                if (likely(!err)) {
                        /*
                         * Initiate read into locked page and return.
                         */
-                       lru_cache_add_active(new_page);
+                       lru_cache_add_anon(new_page);
                        swap_readpage(NULL, new_page);
                        return new_page;
                }
-               clear_page_locked(new_page);
+               ClearPageSwapBacked(new_page);
+               __clear_page_locked(new_page);
                swap_free(entry);
        } while (err != -ENOMEM);
 
index 1e330f2998fa259d2733f73f4c50ce0d649c5dc0..90cb67a5417cccea9f0352b6316f747bec4c608d 100644 (file)
@@ -344,7 +344,7 @@ int can_share_swap_page(struct page *page)
  * Work out if there are any other processes sharing this
  * swap cache page. Free it if you can. Return success.
  */
-int remove_exclusive_swap_page(struct page *page)
+static int remove_exclusive_swap_page_count(struct page *page, int count)
 {
        int retval;
        struct swap_info_struct * p;
@@ -357,7 +357,7 @@ int remove_exclusive_swap_page(struct page *page)
                return 0;
        if (PageWriteback(page))
                return 0;
-       if (page_count(page) != 2) /* 2: us + cache */
+       if (page_count(page) != count) /* us + cache + ptes */
                return 0;
 
        entry.val = page_private(page);
@@ -370,7 +370,7 @@ int remove_exclusive_swap_page(struct page *page)
        if (p->swap_map[swp_offset(entry)] == 1) {
                /* Recheck the page count with the swapcache lock held.. */
                spin_lock_irq(&swapper_space.tree_lock);
-               if ((page_count(page) == 2) && !PageWriteback(page)) {
+               if ((page_count(page) == count) && !PageWriteback(page)) {
                        __delete_from_swap_cache(page);
                        SetPageDirty(page);
                        retval = 1;
@@ -387,6 +387,25 @@ int remove_exclusive_swap_page(struct page *page)
        return retval;
 }
 
+/*
+ * Most of the time the page should have two references: one for the
+ * process and one for the swap cache.
+ */
+int remove_exclusive_swap_page(struct page *page)
+{
+       return remove_exclusive_swap_page_count(page, 2);
+}
+
+/*
+ * The pageout code holds an extra reference to the page.  That raises
+ * the reference count to test for to 2 for a page that is only in the
+ * swap cache plus 1 for each process that maps the page.
+ */
+int remove_exclusive_swap_page_ref(struct page *page)
+{
+       return remove_exclusive_swap_page_count(page, 2 + page_mapcount(page));
+}
+
 /*
  * Free the swap entry like above, but also try to
  * free the page cache entry if it is the last user.
@@ -403,7 +422,7 @@ void free_swap_and_cache(swp_entry_t entry)
        if (p) {
                if (swap_entry_free(p, swp_offset(entry)) == 1) {
                        page = find_get_page(&swapper_space, entry.val);
-                       if (page && unlikely(!trylock_page(page))) {
+                       if (page && !trylock_page(page)) {
                                page_cache_release(page);
                                page = NULL;
                        }
index e83e4b114ef12b78c74480cd0ddaa6b4369d3b4c..1229211104f84beb0e05e5df891f1780d392e418 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/buffer_head.h> /* grr. try_to_release_page,
                                   do_invalidatepage */
+#include "internal.h"
 
 
 /**
@@ -103,6 +104,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 
        cancel_dirty_page(page, PAGE_CACHE_SIZE);
 
+       clear_page_mlock(page);
        remove_from_page_cache(page);
        ClearPageMappedToDisk(page);
        page_cache_release(page);       /* pagecache ref */
@@ -127,6 +129,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
        if (PagePrivate(page) && !try_to_release_page(page, 0))
                return 0;
 
+       clear_page_mlock(page);
        ret = remove_mapping(mapping, page);
 
        return ret;
@@ -352,6 +355,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
        if (PageDirty(page))
                goto failed;
 
+       clear_page_mlock(page);
        BUG_ON(PagePrivate(page));
        __remove_from_page_cache(page);
        spin_unlock_irq(&mapping->tree_lock);
index bba06c41fc59ed10be118f7af24ecfa71d023c33..712ae47af0bf5f23a4b32ea0497dd2f6e8272085 100644 (file)
@@ -8,6 +8,7 @@
  *  Numa awareness, Christoph Lameter, SGI, June 2005
  */
 
+#include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
 #include <linux/debugobjects.h>
 #include <linux/vmalloc.h>
 #include <linux/kallsyms.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
 
+#include <asm/atomic.h>
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 
 
-DEFINE_RWLOCK(vmlist_lock);
-struct vm_struct *vmlist;
-
-static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
-                           int node, void *caller);
+/*** Page table manipulation functions ***/
 
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
 {
@@ -40,8 +42,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
        } while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
-                                               unsigned long end)
+static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
 {
        pmd_t *pmd;
        unsigned long next;
@@ -55,8 +56,7 @@ static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
        } while (pmd++, addr = next, addr != end);
 }
 
-static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
-                                               unsigned long end)
+static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
 {
        pud_t *pud;
        unsigned long next;
@@ -70,12 +70,10 @@ static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
        } while (pud++, addr = next, addr != end);
 }
 
-void unmap_kernel_range(unsigned long addr, unsigned long size)
+static void vunmap_page_range(unsigned long addr, unsigned long end)
 {
        pgd_t *pgd;
        unsigned long next;
-       unsigned long start = addr;
-       unsigned long end = addr + size;
 
        BUG_ON(addr >= end);
        pgd = pgd_offset_k(addr);
@@ -86,35 +84,36 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
                        continue;
                vunmap_pud_range(pgd, addr, next);
        } while (pgd++, addr = next, addr != end);
-       flush_tlb_kernel_range(start, end);
-}
-
-static void unmap_vm_area(struct vm_struct *area)
-{
-       unmap_kernel_range((unsigned long)area->addr, area->size);
 }
 
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
-                       unsigned long end, pgprot_t prot, struct page ***pages)
+               unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
        pte_t *pte;
 
+       /*
+        * nr is a running index into the array which helps higher level
+        * callers keep track of where we're up to.
+        */
+
        pte = pte_alloc_kernel(pmd, addr);
        if (!pte)
                return -ENOMEM;
        do {
-               struct page *page = **pages;
-               WARN_ON(!pte_none(*pte));
-               if (!page)
+               struct page *page = pages[*nr];
+
+               if (WARN_ON(!pte_none(*pte)))
+                       return -EBUSY;
+               if (WARN_ON(!page))
                        return -ENOMEM;
                set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
-               (*pages)++;
+               (*nr)++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
        return 0;
 }
 
-static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
-                       unsigned long end, pgprot_t prot, struct page ***pages)
+static int vmap_pmd_range(pud_t *pud, unsigned long addr,
+               unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
        pmd_t *pmd;
        unsigned long next;
@@ -124,14 +123,14 @@ static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
                return -ENOMEM;
        do {
                next = pmd_addr_end(addr, end);
-               if (vmap_pte_range(pmd, addr, next, prot, pages))
+               if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
                        return -ENOMEM;
        } while (pmd++, addr = next, addr != end);
        return 0;
 }
 
-static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
-                       unsigned long end, pgprot_t prot, struct page ***pages)
+static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+               unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
        pud_t *pud;
        unsigned long next;
@@ -141,44 +140,49 @@ static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
                return -ENOMEM;
        do {
                next = pud_addr_end(addr, end);
-               if (vmap_pmd_range(pud, addr, next, prot, pages))
+               if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
                        return -ENOMEM;
        } while (pud++, addr = next, addr != end);
        return 0;
 }
 
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+/*
+ * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
+ * will have pfns corresponding to the "pages" array.
+ *
+ * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
+ */
+static int vmap_page_range(unsigned long addr, unsigned long end,
+                               pgprot_t prot, struct page **pages)
 {
        pgd_t *pgd;
        unsigned long next;
-       unsigned long addr = (unsigned long) area->addr;
-       unsigned long end = addr + area->size - PAGE_SIZE;
-       int err;
+       int err = 0;
+       int nr = 0;
 
        BUG_ON(addr >= end);
        pgd = pgd_offset_k(addr);
        do {
                next = pgd_addr_end(addr, end);
-               err = vmap_pud_range(pgd, addr, next, prot, pages);
+               err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
                if (err)
                        break;
        } while (pgd++, addr = next, addr != end);
-       flush_cache_vmap((unsigned long) area->addr, end);
-       return err;
+       flush_cache_vmap(addr, end);
+
+       if (unlikely(err))
+               return err;
+       return nr;
 }
-EXPORT_SYMBOL_GPL(map_vm_area);
 
 /*
- * Map a vmalloc()-space virtual address to the physical page.
+ * Walk a vmap address to the struct page it maps.
  */
 struct page *vmalloc_to_page(const void *vmalloc_addr)
 {
        unsigned long addr = (unsigned long) vmalloc_addr;
        struct page *page = NULL;
        pgd_t *pgd = pgd_offset_k(addr);
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *ptep, pte;
 
        /*
         * XXX we might need to change this if we add VIRTUAL_BUG_ON for
@@ -188,10 +192,12 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
                        !is_module_address(addr));
 
        if (!pgd_none(*pgd)) {
-               pud = pud_offset(pgd, addr);
+               pud_t *pud = pud_offset(pgd, addr);
                if (!pud_none(*pud)) {
-                       pmd = pmd_offset(pud, addr);
+                       pmd_t *pmd = pmd_offset(pud, addr);
                        if (!pmd_none(*pmd)) {
+                               pte_t *ptep, pte;
+
                                ptep = pte_offset_map(pmd, addr);
                                pte = *ptep;
                                if (pte_present(pte))
@@ -213,13 +219,751 @@ unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
 }
 EXPORT_SYMBOL(vmalloc_to_pfn);
 
-static struct vm_struct *
-__get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
-               unsigned long end, int node, gfp_t gfp_mask, void *caller)
+
+/*** Global kva allocator ***/
+
+#define VM_LAZY_FREE   0x01
+#define VM_LAZY_FREEING        0x02
+#define VM_VM_AREA     0x04
+
+struct vmap_area {
+       unsigned long va_start;
+       unsigned long va_end;
+       unsigned long flags;
+       struct rb_node rb_node;         /* address sorted rbtree */
+       struct list_head list;          /* address sorted list */
+       struct list_head purge_list;    /* "lazy purge" list */
+       void *private;
+       struct rcu_head rcu_head;
+};
+
+static DEFINE_SPINLOCK(vmap_area_lock);
+static struct rb_root vmap_area_root = RB_ROOT;
+static LIST_HEAD(vmap_area_list);
+
+static struct vmap_area *__find_vmap_area(unsigned long addr)
 {
-       struct vm_struct **p, *tmp, *area;
-       unsigned long align = 1;
+       struct rb_node *n = vmap_area_root.rb_node;
+
+       while (n) {
+               struct vmap_area *va;
+
+               va = rb_entry(n, struct vmap_area, rb_node);
+               if (addr < va->va_start)
+                       n = n->rb_left;
+               else if (addr > va->va_start)
+                       n = n->rb_right;
+               else
+                       return va;
+       }
+
+       return NULL;
+}
+
+static void __insert_vmap_area(struct vmap_area *va)
+{
+       struct rb_node **p = &vmap_area_root.rb_node;
+       struct rb_node *parent = NULL;
+       struct rb_node *tmp;
+
+       while (*p) {
+               struct vmap_area *tmp;
+
+               parent = *p;
+               tmp = rb_entry(parent, struct vmap_area, rb_node);
+               if (va->va_start < tmp->va_end)
+                       p = &(*p)->rb_left;
+               else if (va->va_end > tmp->va_start)
+                       p = &(*p)->rb_right;
+               else
+                       BUG();
+       }
+
+       rb_link_node(&va->rb_node, parent, p);
+       rb_insert_color(&va->rb_node, &vmap_area_root);
+
+       /* address-sort this list so it is usable like the vmlist */
+       tmp = rb_prev(&va->rb_node);
+       if (tmp) {
+               struct vmap_area *prev;
+               prev = rb_entry(tmp, struct vmap_area, rb_node);
+               list_add_rcu(&va->list, &prev->list);
+       } else
+               list_add_rcu(&va->list, &vmap_area_list);
+}
+
+static void purge_vmap_area_lazy(void);
+
+/*
+ * Allocate a region of KVA of the specified size and alignment, within the
+ * vstart and vend.
+ */
+static struct vmap_area *alloc_vmap_area(unsigned long size,
+                               unsigned long align,
+                               unsigned long vstart, unsigned long vend,
+                               int node, gfp_t gfp_mask)
+{
+       struct vmap_area *va;
+       struct rb_node *n;
+       unsigned long addr;
+       int purged = 0;
+
+       BUG_ON(size & ~PAGE_MASK);
+
+       addr = ALIGN(vstart, align);
+
+       va = kmalloc_node(sizeof(struct vmap_area),
+                       gfp_mask & GFP_RECLAIM_MASK, node);
+       if (unlikely(!va))
+               return ERR_PTR(-ENOMEM);
+
+retry:
+       spin_lock(&vmap_area_lock);
+       /* XXX: could have a last_hole cache */
+       n = vmap_area_root.rb_node;
+       if (n) {
+               struct vmap_area *first = NULL;
+
+               do {
+                       struct vmap_area *tmp;
+                       tmp = rb_entry(n, struct vmap_area, rb_node);
+                       if (tmp->va_end >= addr) {
+                               if (!first && tmp->va_start < addr + size)
+                                       first = tmp;
+                               n = n->rb_left;
+                       } else {
+                               first = tmp;
+                               n = n->rb_right;
+                       }
+               } while (n);
+
+               if (!first)
+                       goto found;
+
+               if (first->va_end < addr) {
+                       n = rb_next(&first->rb_node);
+                       if (n)
+                               first = rb_entry(n, struct vmap_area, rb_node);
+                       else
+                               goto found;
+               }
+
+               while (addr + size >= first->va_start && addr + size <= vend) {
+                       addr = ALIGN(first->va_end + PAGE_SIZE, align);
+
+                       n = rb_next(&first->rb_node);
+                       if (n)
+                               first = rb_entry(n, struct vmap_area, rb_node);
+                       else
+                               goto found;
+               }
+       }
+found:
+       if (addr + size > vend) {
+               spin_unlock(&vmap_area_lock);
+               if (!purged) {
+                       purge_vmap_area_lazy();
+                       purged = 1;
+                       goto retry;
+               }
+               if (printk_ratelimit())
+                       printk(KERN_WARNING "vmap allocation failed: "
+                                "use vmalloc=<size> to increase size.\n");
+               return ERR_PTR(-EBUSY);
+       }
+
+       BUG_ON(addr & (align-1));
+
+       va->va_start = addr;
+       va->va_end = addr + size;
+       va->flags = 0;
+       __insert_vmap_area(va);
+       spin_unlock(&vmap_area_lock);
+
+       return va;
+}
+
+static void rcu_free_va(struct rcu_head *head)
+{
+       struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
+
+       kfree(va);
+}
+
+static void __free_vmap_area(struct vmap_area *va)
+{
+       BUG_ON(RB_EMPTY_NODE(&va->rb_node));
+       rb_erase(&va->rb_node, &vmap_area_root);
+       RB_CLEAR_NODE(&va->rb_node);
+       list_del_rcu(&va->list);
+
+       call_rcu(&va->rcu_head, rcu_free_va);
+}
+
+/*
+ * Free a region of KVA allocated by alloc_vmap_area
+ */
+static void free_vmap_area(struct vmap_area *va)
+{
+       spin_lock(&vmap_area_lock);
+       __free_vmap_area(va);
+       spin_unlock(&vmap_area_lock);
+}
+
+/*
+ * Clear the pagetable entries of a given vmap_area
+ */
+static void unmap_vmap_area(struct vmap_area *va)
+{
+       vunmap_page_range(va->va_start, va->va_end);
+}
+
+/*
+ * lazy_max_pages is the maximum amount of virtual address space we gather up
+ * before attempting to purge with a TLB flush.
+ *
+ * There is a tradeoff here: a larger number will cover more kernel page tables
+ * and take slightly longer to purge, but it will linearly reduce the number of
+ * global TLB flushes that must be performed. It would seem natural to scale
+ * this number up linearly with the number of CPUs (because vmapping activity
+ * could also scale linearly with the number of CPUs), however it is likely
+ * that in practice, workloads might be constrained in other ways that mean
+ * vmap activity will not scale linearly with CPUs. Also, I want to be
+ * conservative and not introduce a big latency on huge systems, so go with
+ * a less aggressive log scale. It will still be an improvement over the old
+ * code, and it will be simple to change the scale factor if we find that it
+ * becomes a problem on bigger systems.
+ */
+static unsigned long lazy_max_pages(void)
+{
+       unsigned int log;
+
+       log = fls(num_online_cpus());
+
+       return log * (32UL * 1024 * 1024 / PAGE_SIZE);
+}
+
+static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+
+/*
+ * Purges all lazily-freed vmap areas.
+ *
+ * If sync is 0 then don't purge if there is already a purge in progress.
+ * If force_flush is 1, then flush kernel TLBs between *start and *end even
+ * if we found no lazy vmap areas to unmap (callers can use this to optimise
+ * their own TLB flushing).
+ * Returns with *start = min(*start, lowest purged address)
+ *              *end = max(*end, highest purged address)
+ */
+static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
+                                       int sync, int force_flush)
+{
+       static DEFINE_SPINLOCK(purge_lock);
+       LIST_HEAD(valist);
+       struct vmap_area *va;
+       int nr = 0;
+
+       /*
+        * If sync is 0 but force_flush is 1, we'll go sync anyway but callers
+        * should not expect such behaviour. This just simplifies locking for
+        * the case that isn't actually used at the moment anyway.
+        */
+       if (!sync && !force_flush) {
+               if (!spin_trylock(&purge_lock))
+                       return;
+       } else
+               spin_lock(&purge_lock);
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(va, &vmap_area_list, list) {
+               if (va->flags & VM_LAZY_FREE) {
+                       if (va->va_start < *start)
+                               *start = va->va_start;
+                       if (va->va_end > *end)
+                               *end = va->va_end;
+                       nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
+                       unmap_vmap_area(va);
+                       list_add_tail(&va->purge_list, &valist);
+                       va->flags |= VM_LAZY_FREEING;
+                       va->flags &= ~VM_LAZY_FREE;
+               }
+       }
+       rcu_read_unlock();
+
+       if (nr) {
+               BUG_ON(nr > atomic_read(&vmap_lazy_nr));
+               atomic_sub(nr, &vmap_lazy_nr);
+       }
+
+       if (nr || force_flush)
+               flush_tlb_kernel_range(*start, *end);
+
+       if (nr) {
+               spin_lock(&vmap_area_lock);
+               list_for_each_entry(va, &valist, purge_list)
+                       __free_vmap_area(va);
+               spin_unlock(&vmap_area_lock);
+       }
+       spin_unlock(&purge_lock);
+}
+
+/*
+ * Kick off a purge of the outstanding lazy areas.
+ */
+static void purge_vmap_area_lazy(void)
+{
+       unsigned long start = ULONG_MAX, end = 0;
+
+       __purge_vmap_area_lazy(&start, &end, 0, 0);
+}
+
+/*
+ * Free and unmap a vmap area
+ */
+static void free_unmap_vmap_area(struct vmap_area *va)
+{
+       va->flags |= VM_LAZY_FREE;
+       atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
+       if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
+               purge_vmap_area_lazy();
+}
+
+static struct vmap_area *find_vmap_area(unsigned long addr)
+{
+       struct vmap_area *va;
+
+       spin_lock(&vmap_area_lock);
+       va = __find_vmap_area(addr);
+       spin_unlock(&vmap_area_lock);
+
+       return va;
+}
+
+static void free_unmap_vmap_area_addr(unsigned long addr)
+{
+       struct vmap_area *va;
+
+       va = find_vmap_area(addr);
+       BUG_ON(!va);
+       free_unmap_vmap_area(va);
+}
+
+
+/*** Per cpu kva allocator ***/
+
+/*
+ * vmap space is limited especially on 32 bit architectures. Ensure there is
+ * room for at least 16 percpu vmap blocks per CPU.
+ */
+/*
+ * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
+ * to #define VMALLOC_SPACE            (VMALLOC_END-VMALLOC_START). Guess
+ * instead (we just need a rough idea)
+ */
+#if BITS_PER_LONG == 32
+#define VMALLOC_SPACE          (128UL*1024*1024)
+#else
+#define VMALLOC_SPACE          (128UL*1024*1024*1024)
+#endif
+
+#define VMALLOC_PAGES          (VMALLOC_SPACE / PAGE_SIZE)
+#define VMAP_MAX_ALLOC         BITS_PER_LONG   /* 256K with 4K pages */
+#define VMAP_BBMAP_BITS_MAX    1024    /* 4MB with 4K pages */
+#define VMAP_BBMAP_BITS_MIN    (VMAP_MAX_ALLOC*2)
+#define VMAP_MIN(x, y)         ((x) < (y) ? (x) : (y)) /* can't use min() */
+#define VMAP_MAX(x, y)         ((x) > (y) ? (x) : (y)) /* can't use max() */
+#define VMAP_BBMAP_BITS                VMAP_MIN(VMAP_BBMAP_BITS_MAX,           \
+                                       VMAP_MAX(VMAP_BBMAP_BITS_MIN,   \
+                                               VMALLOC_PAGES / NR_CPUS / 16))
+
+#define VMAP_BLOCK_SIZE                (VMAP_BBMAP_BITS * PAGE_SIZE)
+
+struct vmap_block_queue {
+       spinlock_t lock;
+       struct list_head free;
+       struct list_head dirty;
+       unsigned int nr_dirty;
+};
+
+struct vmap_block {
+       spinlock_t lock;
+       struct vmap_area *va;
+       struct vmap_block_queue *vbq;
+       unsigned long free, dirty;
+       DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
+       DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
+       union {
+               struct {
+                       struct list_head free_list;
+                       struct list_head dirty_list;
+               };
+               struct rcu_head rcu_head;
+       };
+};
+
+/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
+static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
+
+/*
+ * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
+ * in the free path. Could get rid of this if we change the API to return a
+ * "cookie" from alloc, to be passed to free. But no big deal yet.
+ */
+static DEFINE_SPINLOCK(vmap_block_tree_lock);
+static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
+
+/*
+ * We should probably have a fallback mechanism to allocate virtual memory
+ * out of partially filled vmap blocks. However vmap block sizing should be
+ * fairly reasonable according to the vmalloc size, so it shouldn't be a
+ * big problem.
+ */
+
+static unsigned long addr_to_vb_idx(unsigned long addr)
+{
+       addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
+       addr /= VMAP_BLOCK_SIZE;
+       return addr;
+}
+
+static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
+{
+       struct vmap_block_queue *vbq;
+       struct vmap_block *vb;
+       struct vmap_area *va;
+       unsigned long vb_idx;
+       int node, err;
+
+       node = numa_node_id();
+
+       vb = kmalloc_node(sizeof(struct vmap_block),
+                       gfp_mask & GFP_RECLAIM_MASK, node);
+       if (unlikely(!vb))
+               return ERR_PTR(-ENOMEM);
+
+       va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
+                                       VMALLOC_START, VMALLOC_END,
+                                       node, gfp_mask);
+       if (unlikely(IS_ERR(va))) {
+               kfree(vb);
+               return ERR_PTR(PTR_ERR(va));
+       }
+
+       err = radix_tree_preload(gfp_mask);
+       if (unlikely(err)) {
+               kfree(vb);
+               free_vmap_area(va);
+               return ERR_PTR(err);
+       }
+
+       spin_lock_init(&vb->lock);
+       vb->va = va;
+       vb->free = VMAP_BBMAP_BITS;
+       vb->dirty = 0;
+       bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
+       bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
+       INIT_LIST_HEAD(&vb->free_list);
+       INIT_LIST_HEAD(&vb->dirty_list);
+
+       vb_idx = addr_to_vb_idx(va->va_start);
+       spin_lock(&vmap_block_tree_lock);
+       err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
+       spin_unlock(&vmap_block_tree_lock);
+       BUG_ON(err);
+       radix_tree_preload_end();
+
+       vbq = &get_cpu_var(vmap_block_queue);
+       vb->vbq = vbq;
+       spin_lock(&vbq->lock);
+       list_add(&vb->free_list, &vbq->free);
+       spin_unlock(&vbq->lock);
+       put_cpu_var(vmap_cpu_blocks);
+
+       return vb;
+}
+
+static void rcu_free_vb(struct rcu_head *head)
+{
+       struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
+
+       kfree(vb);
+}
+
+static void free_vmap_block(struct vmap_block *vb)
+{
+       struct vmap_block *tmp;
+       unsigned long vb_idx;
+
+       spin_lock(&vb->vbq->lock);
+       if (!list_empty(&vb->free_list))
+               list_del(&vb->free_list);
+       if (!list_empty(&vb->dirty_list))
+               list_del(&vb->dirty_list);
+       spin_unlock(&vb->vbq->lock);
+
+       vb_idx = addr_to_vb_idx(vb->va->va_start);
+       spin_lock(&vmap_block_tree_lock);
+       tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
+       spin_unlock(&vmap_block_tree_lock);
+       BUG_ON(tmp != vb);
+
+       free_unmap_vmap_area(vb->va);
+       call_rcu(&vb->rcu_head, rcu_free_vb);
+}
+
+static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+{
+       struct vmap_block_queue *vbq;
+       struct vmap_block *vb;
+       unsigned long addr = 0;
+       unsigned int order;
+
+       BUG_ON(size & ~PAGE_MASK);
+       BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+       order = get_order(size);
+
+again:
+       rcu_read_lock();
+       vbq = &get_cpu_var(vmap_block_queue);
+       list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+               int i;
+
+               spin_lock(&vb->lock);
+               i = bitmap_find_free_region(vb->alloc_map,
+                                               VMAP_BBMAP_BITS, order);
+
+               if (i >= 0) {
+                       addr = vb->va->va_start + (i << PAGE_SHIFT);
+                       BUG_ON(addr_to_vb_idx(addr) !=
+                                       addr_to_vb_idx(vb->va->va_start));
+                       vb->free -= 1UL << order;
+                       if (vb->free == 0) {
+                               spin_lock(&vbq->lock);
+                               list_del_init(&vb->free_list);
+                               spin_unlock(&vbq->lock);
+                       }
+                       spin_unlock(&vb->lock);
+                       break;
+               }
+               spin_unlock(&vb->lock);
+       }
+       put_cpu_var(vmap_cpu_blocks);
+       rcu_read_unlock();
+
+       if (!addr) {
+               vb = new_vmap_block(gfp_mask);
+               if (IS_ERR(vb))
+                       return vb;
+               goto again;
+       }
+
+       return (void *)addr;
+}
+
+static void vb_free(const void *addr, unsigned long size)
+{
+       unsigned long offset;
+       unsigned long vb_idx;
+       unsigned int order;
+       struct vmap_block *vb;
+
+       BUG_ON(size & ~PAGE_MASK);
+       BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+       order = get_order(size);
+
+       offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
+
+       vb_idx = addr_to_vb_idx((unsigned long)addr);
+       rcu_read_lock();
+       vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
+       rcu_read_unlock();
+       BUG_ON(!vb);
+
+       spin_lock(&vb->lock);
+       bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
+       if (!vb->dirty) {
+               spin_lock(&vb->vbq->lock);
+               list_add(&vb->dirty_list, &vb->vbq->dirty);
+               spin_unlock(&vb->vbq->lock);
+       }
+       vb->dirty += 1UL << order;
+       if (vb->dirty == VMAP_BBMAP_BITS) {
+               BUG_ON(vb->free || !list_empty(&vb->free_list));
+               spin_unlock(&vb->lock);
+               free_vmap_block(vb);
+       } else
+               spin_unlock(&vb->lock);
+}
+
+/**
+ * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
+ *
+ * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
+ * to amortize TLB flushing overheads. What this means is that any page you
+ * have now, may, in a former life, have been mapped into kernel virtual
+ * address by the vmap layer and so there might be some CPUs with TLB entries
+ * still referencing that page (additional to the regular 1:1 kernel mapping).
+ *
+ * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
+ * be sure that none of the pages we have control over will have any aliases
+ * from the vmap layer.
+ */
+void vm_unmap_aliases(void)
+{
+       unsigned long start = ULONG_MAX, end = 0;
+       int cpu;
+       int flush = 0;
+
+       for_each_possible_cpu(cpu) {
+               struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+               struct vmap_block *vb;
+
+               rcu_read_lock();
+               list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+                       int i;
+
+                       spin_lock(&vb->lock);
+                       i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
+                       while (i < VMAP_BBMAP_BITS) {
+                               unsigned long s, e;
+                               int j;
+                               j = find_next_zero_bit(vb->dirty_map,
+                                       VMAP_BBMAP_BITS, i);
+
+                               s = vb->va->va_start + (i << PAGE_SHIFT);
+                               e = vb->va->va_start + (j << PAGE_SHIFT);
+                               vunmap_page_range(s, e);
+                               flush = 1;
+
+                               if (s < start)
+                                       start = s;
+                               if (e > end)
+                                       end = e;
+
+                               i = j;
+                               i = find_next_bit(vb->dirty_map,
+                                                       VMAP_BBMAP_BITS, i);
+                       }
+                       spin_unlock(&vb->lock);
+               }
+               rcu_read_unlock();
+       }
+
+       __purge_vmap_area_lazy(&start, &end, 1, flush);
+}
+EXPORT_SYMBOL_GPL(vm_unmap_aliases);
+
+/**
+ * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
+ * @mem: the pointer returned by vm_map_ram
+ * @count: the count passed to that vm_map_ram call (cannot unmap partial)
+ */
+void vm_unmap_ram(const void *mem, unsigned int count)
+{
+       unsigned long size = count << PAGE_SHIFT;
+       unsigned long addr = (unsigned long)mem;
+
+       BUG_ON(!addr);
+       BUG_ON(addr < VMALLOC_START);
+       BUG_ON(addr > VMALLOC_END);
+       BUG_ON(addr & (PAGE_SIZE-1));
+
+       debug_check_no_locks_freed(mem, size);
+
+       if (likely(count <= VMAP_MAX_ALLOC))
+               vb_free(mem, size);
+       else
+               free_unmap_vmap_area_addr(addr);
+}
+EXPORT_SYMBOL(vm_unmap_ram);
+
+/**
+ * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
+ * @pages: an array of pointers to the pages to be mapped
+ * @count: number of pages
+ * @node: prefer to allocate data structures on this node
+ * @prot: memory protection to use. PAGE_KERNEL for regular RAM
+ * @returns: a pointer to the address that has been mapped, or NULL on failure
+ */
+void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
+{
+       unsigned long size = count << PAGE_SHIFT;
        unsigned long addr;
+       void *mem;
+
+       if (likely(count <= VMAP_MAX_ALLOC)) {
+               mem = vb_alloc(size, GFP_KERNEL);
+               if (IS_ERR(mem))
+                       return NULL;
+               addr = (unsigned long)mem;
+       } else {
+               struct vmap_area *va;
+               va = alloc_vmap_area(size, PAGE_SIZE,
+                               VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
+               if (IS_ERR(va))
+                       return NULL;
+
+               addr = va->va_start;
+               mem = (void *)addr;
+       }
+       if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
+               vm_unmap_ram(mem, count);
+               return NULL;
+       }
+       return mem;
+}
+EXPORT_SYMBOL(vm_map_ram);
+
+void __init vmalloc_init(void)
+{
+       int i;
+
+       for_each_possible_cpu(i) {
+               struct vmap_block_queue *vbq;
+
+               vbq = &per_cpu(vmap_block_queue, i);
+               spin_lock_init(&vbq->lock);
+               INIT_LIST_HEAD(&vbq->free);
+               INIT_LIST_HEAD(&vbq->dirty);
+               vbq->nr_dirty = 0;
+       }
+}
+
+void unmap_kernel_range(unsigned long addr, unsigned long size)
+{
+       unsigned long end = addr + size;
+       vunmap_page_range(addr, end);
+       flush_tlb_kernel_range(addr, end);
+}
+
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+{
+       unsigned long addr = (unsigned long)area->addr;
+       unsigned long end = addr + area->size - PAGE_SIZE;
+       int err;
+
+       err = vmap_page_range(addr, end, prot, *pages);
+       if (err > 0) {
+               *pages += err;
+               err = 0;
+       }
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(map_vm_area);
+
+/*** Old vmalloc interfaces ***/
+DEFINE_RWLOCK(vmlist_lock);
+struct vm_struct *vmlist;
+
+static struct vm_struct *__get_vm_area_node(unsigned long size,
+               unsigned long flags, unsigned long start, unsigned long end,
+               int node, gfp_t gfp_mask, void *caller)
+{
+       static struct vmap_area *va;
+       struct vm_struct *area;
+       struct vm_struct *tmp, **p;
+       unsigned long align = 1;
 
        BUG_ON(in_interrupt());
        if (flags & VM_IOREMAP) {
@@ -232,13 +976,12 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
 
                align = 1ul << bit;
        }
-       addr = ALIGN(start, align);
+
        size = PAGE_ALIGN(size);
        if (unlikely(!size))
                return NULL;
 
        area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
-
        if (unlikely(!area))
                return NULL;
 
@@ -247,48 +990,32 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start,
         */
        size += PAGE_SIZE;
 
-       write_lock(&vmlist_lock);
-       for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
-               if ((unsigned long)tmp->addr < addr) {
-                       if((unsigned long)tmp->addr + tmp->size >= addr)
-                               addr = ALIGN(tmp->size + 
-                                            (unsigned long)tmp->addr, align);
-                       continue;
-               }
-               if ((size + addr) < addr)
-                       goto out;
-               if (size + addr <= (unsigned long)tmp->addr)
-                       goto found;
-               addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
-               if (addr > end - size)
-                       goto out;
+       va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
+       if (IS_ERR(va)) {
+               kfree(area);
+               return NULL;
        }
-       if ((size + addr) < addr)
-               goto out;
-       if (addr > end - size)
-               goto out;
-
-found:
-       area->next = *p;
-       *p = area;
 
        area->flags = flags;
-       area->addr = (void *)addr;
+       area->addr = (void *)va->va_start;
        area->size = size;
        area->pages = NULL;
        area->nr_pages = 0;
        area->phys_addr = 0;
        area->caller = caller;
+       va->private = area;
+       va->flags |= VM_VM_AREA;
+
+       write_lock(&vmlist_lock);
+       for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
+               if (tmp->addr >= area->addr)
+                       break;
+       }
+       area->next = *p;
+       *p = area;
        write_unlock(&vmlist_lock);
 
        return area;
-
-out:
-       write_unlock(&vmlist_lock);
-       kfree(area);
-       if (printk_ratelimit())
-               printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
-       return NULL;
 }
 
 struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
@@ -328,39 +1055,15 @@ struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
                                  gfp_mask, __builtin_return_address(0));
 }
 
-/* Caller must hold vmlist_lock */
-static struct vm_struct *__find_vm_area(const void *addr)
+static struct vm_struct *find_vm_area(const void *addr)
 {
-       struct vm_struct *tmp;
+       struct vmap_area *va;
 
-       for (tmp = vmlist; tmp != NULL; tmp = tmp->next) {
-                if (tmp->addr == addr)
-                       break;
-       }
-
-       return tmp;
-}
-
-/* Caller must hold vmlist_lock */
-static struct vm_struct *__remove_vm_area(const void *addr)
-{
-       struct vm_struct **p, *tmp;
+       va = find_vmap_area((unsigned long)addr);
+       if (va && va->flags & VM_VM_AREA)
+               return va->private;
 
-       for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
-                if (tmp->addr == addr)
-                        goto found;
-       }
        return NULL;
-
-found:
-       unmap_vm_area(tmp);
-       *p = tmp->next;
-
-       /*
-        * Remove the guard page.
-        */
-       tmp->size -= PAGE_SIZE;
-       return tmp;
 }
 
 /**
@@ -373,11 +1076,24 @@ found:
  */
 struct vm_struct *remove_vm_area(const void *addr)
 {
-       struct vm_struct *v;
-       write_lock(&vmlist_lock);
-       v = __remove_vm_area(addr);
-       write_unlock(&vmlist_lock);
-       return v;
+       struct vmap_area *va;
+
+       va = find_vmap_area((unsigned long)addr);
+       if (va && va->flags & VM_VM_AREA) {
+               struct vm_struct *vm = va->private;
+               struct vm_struct *tmp, **p;
+               free_unmap_vmap_area(va);
+               vm->size -= PAGE_SIZE;
+
+               write_lock(&vmlist_lock);
+               for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
+                       ;
+               *p = tmp->next;
+               write_unlock(&vmlist_lock);
+
+               return vm;
+       }
+       return NULL;
 }
 
 static void __vunmap(const void *addr, int deallocate_pages)
@@ -487,6 +1203,8 @@ void *vmap(struct page **pages, unsigned int count,
 }
 EXPORT_SYMBOL(vmap);
 
+static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+                           int node, void *caller);
 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
                                 pgprot_t prot, int node, void *caller)
 {
@@ -613,10 +1331,8 @@ void *vmalloc_user(unsigned long size)
 
        ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
        if (ret) {
-               write_lock(&vmlist_lock);
-               area = __find_vm_area(ret);
+               area = find_vm_area(ret);
                area->flags |= VM_USERMAP;
-               write_unlock(&vmlist_lock);
        }
        return ret;
 }
@@ -696,10 +1412,8 @@ void *vmalloc_32_user(unsigned long size)
 
        ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL);
        if (ret) {
-               write_lock(&vmlist_lock);
-               area = __find_vm_area(ret);
+               area = find_vm_area(ret);
                area->flags |= VM_USERMAP;
-               write_unlock(&vmlist_lock);
        }
        return ret;
 }
@@ -800,26 +1514,25 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
        struct vm_struct *area;
        unsigned long uaddr = vma->vm_start;
        unsigned long usize = vma->vm_end - vma->vm_start;
-       int ret;
 
        if ((PAGE_SIZE-1) & (unsigned long)addr)
                return -EINVAL;
 
-       read_lock(&vmlist_lock);
-       area = __find_vm_area(addr);
+       area = find_vm_area(addr);
        if (!area)
-               goto out_einval_locked;
+               return -EINVAL;
 
        if (!(area->flags & VM_USERMAP))
-               goto out_einval_locked;
+               return -EINVAL;
 
        if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
-               goto out_einval_locked;
-       read_unlock(&vmlist_lock);
+               return -EINVAL;
 
        addr += pgoff << PAGE_SHIFT;
        do {
                struct page *page = vmalloc_to_page(addr);
+               int ret;
+
                ret = vm_insert_page(vma, uaddr, page);
                if (ret)
                        return ret;
@@ -832,11 +1545,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
        /* Prevent "things" like memory migration? VM_flags need a cleanup... */
        vma->vm_flags |= VM_RESERVED;
 
-       return ret;
-
-out_einval_locked:
-       read_unlock(&vmlist_lock);
-       return -EINVAL;
+       return 0;
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
index 1ff1a58e7c1075fffecda7eb9ce5c360e8369a17..3b5860294bb6654a7b6765a327cef83a9952a29d 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/freezer.h>
 #include <linux/memcontrol.h>
 #include <linux/delayacct.h>
+#include <linux/sysctl.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -78,7 +79,7 @@ struct scan_control {
        unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
                        unsigned long *scanned, int order, int mode,
                        struct zone *z, struct mem_cgroup *mem_cont,
-                       int active);
+                       int active, int file);
 };
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -470,6 +471,85 @@ int remove_mapping(struct address_space *mapping, struct page *page)
        return 0;
 }
 
+/**
+ * putback_lru_page - put previously isolated page onto appropriate LRU list
+ * @page: page to be put back to appropriate lru list
+ *
+ * Add previously isolated @page to appropriate LRU list.
+ * Page may still be unevictable for other reasons.
+ *
+ * lru_lock must not be held, interrupts must be enabled.
+ */
+#ifdef CONFIG_UNEVICTABLE_LRU
+void putback_lru_page(struct page *page)
+{
+       int lru;
+       int active = !!TestClearPageActive(page);
+       int was_unevictable = PageUnevictable(page);
+
+       VM_BUG_ON(PageLRU(page));
+
+redo:
+       ClearPageUnevictable(page);
+
+       if (page_evictable(page, NULL)) {
+               /*
+                * For evictable pages, we can use the cache.
+                * In event of a race, worst case is we end up with an
+                * unevictable page on [in]active list.
+                * We know how to handle that.
+                */
+               lru = active + page_is_file_cache(page);
+               lru_cache_add_lru(page, lru);
+       } else {
+               /*
+                * Put unevictable pages directly on zone's unevictable
+                * list.
+                */
+               lru = LRU_UNEVICTABLE;
+               add_page_to_unevictable_list(page);
+       }
+       mem_cgroup_move_lists(page, lru);
+
+       /*
+        * page's status can change while we move it among lru. If an evictable
+        * page is on unevictable list, it never be freed. To avoid that,
+        * check after we added it to the list, again.
+        */
+       if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) {
+               if (!isolate_lru_page(page)) {
+                       put_page(page);
+                       goto redo;
+               }
+               /* This means someone else dropped this page from LRU
+                * So, it will be freed or putback to LRU again. There is
+                * nothing to do here.
+                */
+       }
+
+       if (was_unevictable && lru != LRU_UNEVICTABLE)
+               count_vm_event(UNEVICTABLE_PGRESCUED);
+       else if (!was_unevictable && lru == LRU_UNEVICTABLE)
+               count_vm_event(UNEVICTABLE_PGCULLED);
+
+       put_page(page);         /* drop ref from isolate */
+}
+
+#else /* CONFIG_UNEVICTABLE_LRU */
+
+void putback_lru_page(struct page *page)
+{
+       int lru;
+       VM_BUG_ON(PageLRU(page));
+
+       lru = !!TestClearPageActive(page) + page_is_file_cache(page);
+       lru_cache_add_lru(page, lru);
+       mem_cgroup_move_lists(page, lru);
+       put_page(page);
+}
+#endif /* CONFIG_UNEVICTABLE_LRU */
+
+
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -503,6 +583,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
                sc->nr_scanned++;
 
+               if (unlikely(!page_evictable(page, NULL)))
+                       goto cull_mlocked;
+
                if (!sc->may_swap && page_mapped(page))
                        goto keep_locked;
 
@@ -539,9 +622,19 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 * Anonymous process memory has backing store?
                 * Try to allocate it some swap space here.
                 */
-               if (PageAnon(page) && !PageSwapCache(page))
+               if (PageAnon(page) && !PageSwapCache(page)) {
+                       switch (try_to_munlock(page)) {
+                       case SWAP_FAIL:         /* shouldn't happen */
+                       case SWAP_AGAIN:
+                               goto keep_locked;
+                       case SWAP_MLOCK:
+                               goto cull_mlocked;
+                       case SWAP_SUCCESS:
+                               ; /* fall thru'; add to swap cache */
+                       }
                        if (!add_to_swap(page, GFP_ATOMIC))
                                goto activate_locked;
+               }
 #endif /* CONFIG_SWAP */
 
                mapping = page_mapping(page);
@@ -556,6 +649,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                goto activate_locked;
                        case SWAP_AGAIN:
                                goto keep_locked;
+                       case SWAP_MLOCK:
+                               goto cull_mlocked;
                        case SWAP_SUCCESS:
                                ; /* try to free the page below */
                        }
@@ -602,7 +697,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 * possible for a page to have PageDirty set, but it is actually
                 * clean (all its buffers are clean).  This happens if the
                 * buffers were written out directly, with submit_bh(). ext3
-                * will do this, as well as the blockdev mapping. 
+                * will do this, as well as the blockdev mapping.
                 * try_to_release_page() will discover that cleanness and will
                 * drop the buffers and mark the page clean - it can be freed.
                 *
@@ -637,7 +732,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                if (!mapping || !__remove_mapping(mapping, page))
                        goto keep_locked;
 
-               unlock_page(page);
+               /*
+                * At this point, we have no other references and there is
+                * no way to pick any more up (removed from LRU, removed
+                * from pagecache). Can use non-atomic bitops now (and
+                * we obviously don't have to worry about waking up a process
+                * waiting on the page lock, because there are no references.
+                */
+               __clear_page_locked(page);
 free_it:
                nr_reclaimed++;
                if (!pagevec_add(&freed_pvec, page)) {
@@ -646,14 +748,23 @@ free_it:
                }
                continue;
 
+cull_mlocked:
+               unlock_page(page);
+               putback_lru_page(page);
+               continue;
+
 activate_locked:
+               /* Not a candidate for swapping, so reclaim swap space. */
+               if (PageSwapCache(page) && vm_swap_full())
+                       remove_exclusive_swap_page_ref(page);
+               VM_BUG_ON(PageActive(page));
                SetPageActive(page);
                pgactivate++;
 keep_locked:
                unlock_page(page);
 keep:
                list_add(&page->lru, &ret_pages);
-               VM_BUG_ON(PageLRU(page));
+               VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
        }
        list_splice(&ret_pages, page_list);
        if (pagevec_count(&freed_pvec))
@@ -677,7 +788,7 @@ keep:
  *
  * returns 0 on success, -ve errno on failure.
  */
-int __isolate_lru_page(struct page *page, int mode)
+int __isolate_lru_page(struct page *page, int mode, int file)
 {
        int ret = -EINVAL;
 
@@ -693,6 +804,17 @@ int __isolate_lru_page(struct page *page, int mode)
        if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
                return ret;
 
+       if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
+               return ret;
+
+       /*
+        * When this function is being called for lumpy reclaim, we
+        * initially look into all LRU pages, active, inactive and
+        * unevictable; only give shrink_page_list evictable pages.
+        */
+       if (PageUnevictable(page))
+               return ret;
+
        ret = -EBUSY;
        if (likely(get_page_unless_zero(page))) {
                /*
@@ -723,12 +845,13 @@ int __isolate_lru_page(struct page *page, int mode)
  * @scanned:   The number of pages that were scanned.
  * @order:     The caller's attempted allocation order
  * @mode:      One of the LRU isolation modes
+ * @file:      True [1] if isolating file [!anon] pages
  *
  * returns how many pages were moved onto *@dst.
  */
 static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                struct list_head *src, struct list_head *dst,
-               unsigned long *scanned, int order, int mode)
+               unsigned long *scanned, int order, int mode, int file)
 {
        unsigned long nr_taken = 0;
        unsigned long scan;
@@ -745,7 +868,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
                VM_BUG_ON(!PageLRU(page));
 
-               switch (__isolate_lru_page(page, mode)) {
+               switch (__isolate_lru_page(page, mode, file)) {
                case 0:
                        list_move(&page->lru, dst);
                        nr_taken++;
@@ -788,10 +911,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                                break;
 
                        cursor_page = pfn_to_page(pfn);
+
                        /* Check that we have not crossed a zone boundary. */
                        if (unlikely(page_zone_id(cursor_page) != zone_id))
                                continue;
-                       switch (__isolate_lru_page(cursor_page, mode)) {
+                       switch (__isolate_lru_page(cursor_page, mode, file)) {
                        case 0:
                                list_move(&cursor_page->lru, dst);
                                nr_taken++;
@@ -802,7 +926,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                                /* else it is being freed elsewhere */
                                list_move(&cursor_page->lru, src);
                        default:
-                               break;
+                               break;  /* ! on LRU or wrong list */
                        }
                }
        }
@@ -816,40 +940,93 @@ static unsigned long isolate_pages_global(unsigned long nr,
                                        unsigned long *scanned, int order,
                                        int mode, struct zone *z,
                                        struct mem_cgroup *mem_cont,
-                                       int active)
+                                       int active, int file)
 {
+       int lru = LRU_BASE;
        if (active)
-               return isolate_lru_pages(nr, &z->active_list, dst,
-                                               scanned, order, mode);
-       else
-               return isolate_lru_pages(nr, &z->inactive_list, dst,
-                                               scanned, order, mode);
+               lru += LRU_ACTIVE;
+       if (file)
+               lru += LRU_FILE;
+       return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
+                                                               mode, !!file);
 }
 
 /*
  * clear_active_flags() is a helper for shrink_active_list(), clearing
  * any active bits from the pages in the list.
  */
-static unsigned long clear_active_flags(struct list_head *page_list)
+static unsigned long clear_active_flags(struct list_head *page_list,
+                                       unsigned int *count)
 {
        int nr_active = 0;
+       int lru;
        struct page *page;
 
-       list_for_each_entry(page, page_list, lru)
+       list_for_each_entry(page, page_list, lru) {
+               lru = page_is_file_cache(page);
                if (PageActive(page)) {
+                       lru += LRU_ACTIVE;
                        ClearPageActive(page);
                        nr_active++;
                }
+               count[lru]++;
+       }
 
        return nr_active;
 }
 
+/**
+ * isolate_lru_page - tries to isolate a page from its LRU list
+ * @page: page to isolate from its LRU list
+ *
+ * Isolates a @page from an LRU list, clears PageLRU and adjusts the
+ * vmstat statistic corresponding to whatever LRU list the page was on.
+ *
+ * Returns 0 if the page was removed from an LRU list.
+ * Returns -EBUSY if the page was not on an LRU list.
+ *
+ * The returned page will have PageLRU() cleared.  If it was found on
+ * the active list, it will have PageActive set.  If it was found on
+ * the unevictable list, it will have the PageUnevictable bit set. That flag
+ * may need to be cleared by the caller before letting the page go.
+ *
+ * The vmstat statistic corresponding to the list on which the page was
+ * found will be decremented.
+ *
+ * Restrictions:
+ * (1) Must be called with an elevated refcount on the page. This is a
+ *     fundamentnal difference from isolate_lru_pages (which is called
+ *     without a stable reference).
+ * (2) the lru_lock must not be held.
+ * (3) interrupts must be enabled.
+ */
+int isolate_lru_page(struct page *page)
+{
+       int ret = -EBUSY;
+
+       if (PageLRU(page)) {
+               struct zone *zone = page_zone(page);
+
+               spin_lock_irq(&zone->lru_lock);
+               if (PageLRU(page) && get_page_unless_zero(page)) {
+                       int lru = page_lru(page);
+                       ret = 0;
+                       ClearPageLRU(page);
+
+                       del_page_from_lru_list(zone, page, lru);
+               }
+               spin_unlock_irq(&zone->lru_lock);
+       }
+       return ret;
+}
+
 /*
  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
  * of reclaimed pages
  */
 static unsigned long shrink_inactive_list(unsigned long max_scan,
-                               struct zone *zone, struct scan_control *sc)
+                       struct zone *zone, struct scan_control *sc,
+                       int priority, int file)
 {
        LIST_HEAD(page_list);
        struct pagevec pvec;
@@ -866,20 +1043,43 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                unsigned long nr_scan;
                unsigned long nr_freed;
                unsigned long nr_active;
+               unsigned int count[NR_LRU_LISTS] = { 0, };
+               int mode = ISOLATE_INACTIVE;
+
+               /*
+                * If we need a large contiguous chunk of memory, or have
+                * trouble getting a small set of contiguous pages, we
+                * will reclaim both active and inactive pages.
+                *
+                * We use the same threshold as pageout congestion_wait below.
+                */
+               if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+                       mode = ISOLATE_BOTH;
+               else if (sc->order && priority < DEF_PRIORITY - 2)
+                       mode = ISOLATE_BOTH;
 
                nr_taken = sc->isolate_pages(sc->swap_cluster_max,
-                            &page_list, &nr_scan, sc->order,
-                            (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
-                                            ISOLATE_BOTH : ISOLATE_INACTIVE,
-                               zone, sc->mem_cgroup, 0);
-               nr_active = clear_active_flags(&page_list);
+                            &page_list, &nr_scan, sc->order, mode,
+                               zone, sc->mem_cgroup, 0, file);
+               nr_active = clear_active_flags(&page_list, count);
                __count_vm_events(PGDEACTIVATE, nr_active);
 
-               __mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
-               __mod_zone_page_state(zone, NR_INACTIVE,
-                                               -(nr_taken - nr_active));
-               if (scan_global_lru(sc))
+               __mod_zone_page_state(zone, NR_ACTIVE_FILE,
+                                               -count[LRU_ACTIVE_FILE]);
+               __mod_zone_page_state(zone, NR_INACTIVE_FILE,
+                                               -count[LRU_INACTIVE_FILE]);
+               __mod_zone_page_state(zone, NR_ACTIVE_ANON,
+                                               -count[LRU_ACTIVE_ANON]);
+               __mod_zone_page_state(zone, NR_INACTIVE_ANON,
+                                               -count[LRU_INACTIVE_ANON]);
+
+               if (scan_global_lru(sc)) {
                        zone->pages_scanned += nr_scan;
+                       zone->recent_scanned[0] += count[LRU_INACTIVE_ANON];
+                       zone->recent_scanned[0] += count[LRU_ACTIVE_ANON];
+                       zone->recent_scanned[1] += count[LRU_INACTIVE_FILE];
+                       zone->recent_scanned[1] += count[LRU_ACTIVE_FILE];
+               }
                spin_unlock_irq(&zone->lru_lock);
 
                nr_scanned += nr_scan;
@@ -899,7 +1099,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                         * The attempt at page out may have made some
                         * of the pages active, mark them inactive again.
                         */
-                       nr_active = clear_active_flags(&page_list);
+                       nr_active = clear_active_flags(&page_list, count);
                        count_vm_events(PGDEACTIVATE, nr_active);
 
                        nr_freed += shrink_page_list(&page_list, sc,
@@ -924,14 +1124,24 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                 * Put back any unfreeable pages.
                 */
                while (!list_empty(&page_list)) {
+                       int lru;
                        page = lru_to_page(&page_list);
                        VM_BUG_ON(PageLRU(page));
-                       SetPageLRU(page);
                        list_del(&page->lru);
-                       if (PageActive(page))
-                               add_page_to_active_list(zone, page);
-                       else
-                               add_page_to_inactive_list(zone, page);
+                       if (unlikely(!page_evictable(page, NULL))) {
+                               spin_unlock_irq(&zone->lru_lock);
+                               putback_lru_page(page);
+                               spin_lock_irq(&zone->lru_lock);
+                               continue;
+                       }
+                       SetPageLRU(page);
+                       lru = page_lru(page);
+                       add_page_to_lru_list(zone, page, lru);
+                       mem_cgroup_move_lists(page, lru);
+                       if (PageActive(page) && scan_global_lru(sc)) {
+                               int file = !!page_is_file_cache(page);
+                               zone->recent_rotated[file]++;
+                       }
                        if (!pagevec_add(&pvec, page)) {
                                spin_unlock_irq(&zone->lru_lock);
                                __pagevec_release(&pvec);
@@ -962,115 +1172,7 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
 
 static inline int zone_is_near_oom(struct zone *zone)
 {
-       return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
-                               + zone_page_state(zone, NR_INACTIVE))*3;
-}
-
-/*
- * Determine we should try to reclaim mapped pages.
- * This is called only when sc->mem_cgroup is NULL.
- */
-static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
-                               int priority)
-{
-       long mapped_ratio;
-       long distress;
-       long swap_tendency;
-       long imbalance;
-       int reclaim_mapped = 0;
-       int prev_priority;
-
-       if (scan_global_lru(sc) && zone_is_near_oom(zone))
-               return 1;
-       /*
-        * `distress' is a measure of how much trouble we're having
-        * reclaiming pages.  0 -> no problems.  100 -> great trouble.
-        */
-       if (scan_global_lru(sc))
-               prev_priority = zone->prev_priority;
-       else
-               prev_priority = mem_cgroup_get_reclaim_priority(sc->mem_cgroup);
-
-       distress = 100 >> min(prev_priority, priority);
-
-       /*
-        * The point of this algorithm is to decide when to start
-        * reclaiming mapped memory instead of just pagecache.  Work out
-        * how much memory
-        * is mapped.
-        */
-       if (scan_global_lru(sc))
-               mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
-                               global_page_state(NR_ANON_PAGES)) * 100) /
-                                       vm_total_pages;
-       else
-               mapped_ratio = mem_cgroup_calc_mapped_ratio(sc->mem_cgroup);
-
-       /*
-        * Now decide how much we really want to unmap some pages.  The
-        * mapped ratio is downgraded - just because there's a lot of
-        * mapped memory doesn't necessarily mean that page reclaim
-        * isn't succeeding.
-        *
-        * The distress ratio is important - we don't want to start
-        * going oom.
-        *
-        * A 100% value of vm_swappiness overrides this algorithm
-        * altogether.
-        */
-       swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
-
-       /*
-        * If there's huge imbalance between active and inactive
-        * (think active 100 times larger than inactive) we should
-        * become more permissive, or the system will take too much
-        * cpu before it start swapping during memory pressure.
-        * Distress is about avoiding early-oom, this is about
-        * making swappiness graceful despite setting it to low
-        * values.
-        *
-        * Avoid div by zero with nr_inactive+1, and max resulting
-        * value is vm_total_pages.
-        */
-       if (scan_global_lru(sc)) {
-               imbalance  = zone_page_state(zone, NR_ACTIVE);
-               imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
-       } else
-               imbalance = mem_cgroup_reclaim_imbalance(sc->mem_cgroup);
-
-       /*
-        * Reduce the effect of imbalance if swappiness is low,
-        * this means for a swappiness very low, the imbalance
-        * must be much higher than 100 for this logic to make
-        * the difference.
-        *
-        * Max temporary value is vm_total_pages*100.
-        */
-       imbalance *= (vm_swappiness + 1);
-       imbalance /= 100;
-
-       /*
-        * If not much of the ram is mapped, makes the imbalance
-        * less relevant, it's high priority we refill the inactive
-        * list with mapped pages only in presence of high ratio of
-        * mapped pages.
-        *
-        * Max temporary value is vm_total_pages*100.
-        */
-       imbalance *= mapped_ratio;
-       imbalance /= 100;
-
-       /* apply imbalance feedback to swap_tendency */
-       swap_tendency += imbalance;
-
-       /*
-        * Now use this metric to decide whether to start moving mapped
-        * memory onto the inactive list.
-        */
-       if (swap_tendency >= 100)
-               reclaim_mapped = 1;
-
-       return reclaim_mapped;
+       return zone->pages_scanned >= (zone_lru_pages(zone) * 3);
 }
 
 /*
@@ -1093,53 +1195,71 @@ static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone,
 
 
 static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
-                               struct scan_control *sc, int priority)
+                       struct scan_control *sc, int priority, int file)
 {
        unsigned long pgmoved;
        int pgdeactivate = 0;
        unsigned long pgscanned;
        LIST_HEAD(l_hold);      /* The pages which were snipped off */
-       LIST_HEAD(l_inactive);  /* Pages to go onto the inactive_list */
-       LIST_HEAD(l_active);    /* Pages to go onto the active_list */
+       LIST_HEAD(l_inactive);
        struct page *page;
        struct pagevec pvec;
-       int reclaim_mapped = 0;
-
-       if (sc->may_swap)
-               reclaim_mapped = calc_reclaim_mapped(sc, zone, priority);
+       enum lru_list lru;
 
        lru_add_drain();
        spin_lock_irq(&zone->lru_lock);
        pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
                                        ISOLATE_ACTIVE, zone,
-                                       sc->mem_cgroup, 1);
+                                       sc->mem_cgroup, 1, file);
        /*
         * zone->pages_scanned is used for detect zone's oom
         * mem_cgroup remembers nr_scan by itself.
         */
-       if (scan_global_lru(sc))
+       if (scan_global_lru(sc)) {
                zone->pages_scanned += pgscanned;
+               zone->recent_scanned[!!file] += pgmoved;
+       }
 
-       __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
+       if (file)
+               __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
+       else
+               __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
        spin_unlock_irq(&zone->lru_lock);
 
+       pgmoved = 0;
        while (!list_empty(&l_hold)) {
                cond_resched();
                page = lru_to_page(&l_hold);
                list_del(&page->lru);
-               if (page_mapped(page)) {
-                       if (!reclaim_mapped ||
-                           (total_swap_pages == 0 && PageAnon(page)) ||
-                           page_referenced(page, 0, sc->mem_cgroup)) {
-                               list_add(&page->lru, &l_active);
-                               continue;
-                       }
+
+               if (unlikely(!page_evictable(page, NULL))) {
+                       putback_lru_page(page);
+                       continue;
                }
+
+               /* page_referenced clears PageReferenced */
+               if (page_mapping_inuse(page) &&
+                   page_referenced(page, 0, sc->mem_cgroup))
+                       pgmoved++;
+
                list_add(&page->lru, &l_inactive);
        }
 
+       /*
+        * Count referenced pages from currently used mappings as
+        * rotated, even though they are moved to the inactive list.
+        * This helps balance scan pressure between file and anonymous
+        * pages in get_scan_ratio.
+        */
+       zone->recent_rotated[!!file] += pgmoved;
+
+       /*
+        * Move the pages to the [file or anon] inactive list.
+        */
        pagevec_init(&pvec, 1);
+
        pgmoved = 0;
+       lru = LRU_BASE + file * LRU_FILE;
        spin_lock_irq(&zone->lru_lock);
        while (!list_empty(&l_inactive)) {
                page = lru_to_page(&l_inactive);
@@ -1149,11 +1269,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                VM_BUG_ON(!PageActive(page));
                ClearPageActive(page);
 
-               list_move(&page->lru, &zone->inactive_list);
-               mem_cgroup_move_lists(page, false);
+               list_move(&page->lru, &zone->lru[lru].list);
+               mem_cgroup_move_lists(page, lru);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
-                       __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
+                       __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
                        spin_unlock_irq(&zone->lru_lock);
                        pgdeactivate += pgmoved;
                        pgmoved = 0;
@@ -1163,104 +1283,189 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                        spin_lock_irq(&zone->lru_lock);
                }
        }
-       __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
+       __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
        pgdeactivate += pgmoved;
        if (buffer_heads_over_limit) {
                spin_unlock_irq(&zone->lru_lock);
                pagevec_strip(&pvec);
                spin_lock_irq(&zone->lru_lock);
        }
-
-       pgmoved = 0;
-       while (!list_empty(&l_active)) {
-               page = lru_to_page(&l_active);
-               prefetchw_prev_lru_page(page, &l_active, flags);
-               VM_BUG_ON(PageLRU(page));
-               SetPageLRU(page);
-               VM_BUG_ON(!PageActive(page));
-
-               list_move(&page->lru, &zone->active_list);
-               mem_cgroup_move_lists(page, true);
-               pgmoved++;
-               if (!pagevec_add(&pvec, page)) {
-                       __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
-                       pgmoved = 0;
-                       spin_unlock_irq(&zone->lru_lock);
-                       __pagevec_release(&pvec);
-                       spin_lock_irq(&zone->lru_lock);
-               }
-       }
-       __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
-
        __count_zone_vm_events(PGREFILL, zone, pgscanned);
        __count_vm_events(PGDEACTIVATE, pgdeactivate);
        spin_unlock_irq(&zone->lru_lock);
+       if (vm_swap_full())
+               pagevec_swap_free(&pvec);
 
        pagevec_release(&pvec);
 }
 
+static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+       struct zone *zone, struct scan_control *sc, int priority)
+{
+       int file = is_file_lru(lru);
+
+       if (lru == LRU_ACTIVE_FILE) {
+               shrink_active_list(nr_to_scan, zone, sc, priority, file);
+               return 0;
+       }
+
+       if (lru == LRU_ACTIVE_ANON &&
+           (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
+               shrink_active_list(nr_to_scan, zone, sc, priority, file);
+               return 0;
+       }
+       return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
+}
+
+/*
+ * Determine how aggressively the anon and file LRU lists should be
+ * scanned.  The relative value of each set of LRU lists is determined
+ * by looking at the fraction of the pages scanned we did rotate back
+ * onto the active list instead of evict.
+ *
+ * percent[0] specifies how much pressure to put on ram/swap backed
+ * memory, while percent[1] determines pressure on the file LRUs.
+ */
+static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
+                                       unsigned long *percent)
+{
+       unsigned long anon, file, free;
+       unsigned long anon_prio, file_prio;
+       unsigned long ap, fp;
+
+       anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
+               zone_page_state(zone, NR_INACTIVE_ANON);
+       file  = zone_page_state(zone, NR_ACTIVE_FILE) +
+               zone_page_state(zone, NR_INACTIVE_FILE);
+       free  = zone_page_state(zone, NR_FREE_PAGES);
+
+       /* If we have no swap space, do not bother scanning anon pages. */
+       if (nr_swap_pages <= 0) {
+               percent[0] = 0;
+               percent[1] = 100;
+               return;
+       }
+
+       /* If we have very few page cache pages, force-scan anon pages. */
+       if (unlikely(file + free <= zone->pages_high)) {
+               percent[0] = 100;
+               percent[1] = 0;
+               return;
+       }
+
+       /*
+        * OK, so we have swap space and a fair amount of page cache
+        * pages.  We use the recently rotated / recently scanned
+        * ratios to determine how valuable each cache is.
+        *
+        * Because workloads change over time (and to avoid overflow)
+        * we keep these statistics as a floating average, which ends
+        * up weighing recent references more than old ones.
+        *
+        * anon in [0], file in [1]
+        */
+       if (unlikely(zone->recent_scanned[0] > anon / 4)) {
+               spin_lock_irq(&zone->lru_lock);
+               zone->recent_scanned[0] /= 2;
+               zone->recent_rotated[0] /= 2;
+               spin_unlock_irq(&zone->lru_lock);
+       }
+
+       if (unlikely(zone->recent_scanned[1] > file / 4)) {
+               spin_lock_irq(&zone->lru_lock);
+               zone->recent_scanned[1] /= 2;
+               zone->recent_rotated[1] /= 2;
+               spin_unlock_irq(&zone->lru_lock);
+       }
+
+       /*
+        * With swappiness at 100, anonymous and file have the same priority.
+        * This scanning priority is essentially the inverse of IO cost.
+        */
+       anon_prio = sc->swappiness;
+       file_prio = 200 - sc->swappiness;
+
+       /*
+        *                  anon       recent_rotated[0]
+        * %anon = 100 * ----------- / ----------------- * IO cost
+        *               anon + file      rotate_sum
+        */
+       ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
+       ap /= zone->recent_rotated[0] + 1;
+
+       fp = (file_prio + 1) * (zone->recent_scanned[1] + 1);
+       fp /= zone->recent_rotated[1] + 1;
+
+       /* Normalize to percentages */
+       percent[0] = 100 * ap / (ap + fp + 1);
+       percent[1] = 100 - percent[0];
+}
+
+
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
 static unsigned long shrink_zone(int priority, struct zone *zone,
                                struct scan_control *sc)
 {
-       unsigned long nr_active;
-       unsigned long nr_inactive;
+       unsigned long nr[NR_LRU_LISTS];
        unsigned long nr_to_scan;
        unsigned long nr_reclaimed = 0;
+       unsigned long percent[2];       /* anon @ 0; file @ 1 */
+       enum lru_list l;
 
-       if (scan_global_lru(sc)) {
-               /*
-                * Add one to nr_to_scan just to make sure that the kernel
-                * will slowly sift through the active list.
-                */
-               zone->nr_scan_active +=
-                       (zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
-               nr_active = zone->nr_scan_active;
-               zone->nr_scan_inactive +=
-                       (zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
-               nr_inactive = zone->nr_scan_inactive;
-               if (nr_inactive >= sc->swap_cluster_max)
-                       zone->nr_scan_inactive = 0;
-               else
-                       nr_inactive = 0;
-
-               if (nr_active >= sc->swap_cluster_max)
-                       zone->nr_scan_active = 0;
-               else
-                       nr_active = 0;
-       } else {
-               /*
-                * This reclaim occurs not because zone memory shortage but
-                * because memory controller hits its limit.
-                * Then, don't modify zone reclaim related data.
-                */
-               nr_active = mem_cgroup_calc_reclaim_active(sc->mem_cgroup,
-                                       zone, priority);
-
-               nr_inactive = mem_cgroup_calc_reclaim_inactive(sc->mem_cgroup,
-                                       zone, priority);
-       }
+       get_scan_ratio(zone, sc, percent);
 
+       for_each_evictable_lru(l) {
+               if (scan_global_lru(sc)) {
+                       int file = is_file_lru(l);
+                       int scan;
 
-       while (nr_active || nr_inactive) {
-               if (nr_active) {
-                       nr_to_scan = min(nr_active,
-                                       (unsigned long)sc->swap_cluster_max);
-                       nr_active -= nr_to_scan;
-                       shrink_active_list(nr_to_scan, zone, sc, priority);
+                       scan = zone_page_state(zone, NR_LRU_BASE + l);
+                       if (priority) {
+                               scan >>= priority;
+                               scan = (scan * percent[file]) / 100;
+                       }
+                       zone->lru[l].nr_scan += scan;
+                       nr[l] = zone->lru[l].nr_scan;
+                       if (nr[l] >= sc->swap_cluster_max)
+                               zone->lru[l].nr_scan = 0;
+                       else
+                               nr[l] = 0;
+               } else {
+                       /*
+                        * This reclaim occurs not because zone memory shortage
+                        * but because memory controller hits its limit.
+                        * Don't modify zone reclaim related data.
+                        */
+                       nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
+                                                               priority, l);
                }
+       }
 
-               if (nr_inactive) {
-                       nr_to_scan = min(nr_inactive,
+       while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+                                       nr[LRU_INACTIVE_FILE]) {
+               for_each_evictable_lru(l) {
+                       if (nr[l]) {
+                               nr_to_scan = min(nr[l],
                                        (unsigned long)sc->swap_cluster_max);
-                       nr_inactive -= nr_to_scan;
-                       nr_reclaimed += shrink_inactive_list(nr_to_scan, zone,
-                                                               sc);
+                               nr[l] -= nr_to_scan;
+
+                               nr_reclaimed += shrink_list(l, nr_to_scan,
+                                                       zone, sc, priority);
+                       }
                }
        }
 
+       /*
+        * Even if we did not try to evict anon pages at all, we want to
+        * rebalance the anon lru active/inactive ratio.
+        */
+       if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
+               shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+       else if (!scan_global_lru(sc))
+               shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+
        throttle_vm_writeout(sc->gfp_mask);
        return nr_reclaimed;
 }
@@ -1321,7 +1526,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
 
        return nr_reclaimed;
 }
+
 /*
  * This is the main entry point to direct page reclaim.
  *
@@ -1364,8 +1569,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                continue;
 
-                       lru_pages += zone_page_state(zone, NR_ACTIVE)
-                                       + zone_page_state(zone, NR_INACTIVE);
+                       lru_pages += zone_lru_pages(zone);
                }
        }
 
@@ -1555,6 +1759,14 @@ loop_again:
                            priority != DEF_PRIORITY)
                                continue;
 
+                       /*
+                        * Do some background aging of the anon list, to give
+                        * pages a chance to be referenced before reclaiming.
+                        */
+                       if (inactive_anon_is_low(zone))
+                               shrink_active_list(SWAP_CLUSTER_MAX, zone,
+                                                       &sc, priority, 0);
+
                        if (!zone_watermark_ok(zone, order, zone->pages_high,
                                               0, 0)) {
                                end_zone = i;
@@ -1567,8 +1779,7 @@ loop_again:
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
 
-                       lru_pages += zone_page_state(zone, NR_ACTIVE)
-                                       + zone_page_state(zone, NR_INACTIVE);
+                       lru_pages += zone_lru_pages(zone);
                }
 
                /*
@@ -1612,8 +1823,7 @@ loop_again:
                        if (zone_is_all_unreclaimable(zone))
                                continue;
                        if (nr_slab == 0 && zone->pages_scanned >=
-                               (zone_page_state(zone, NR_ACTIVE)
-                               + zone_page_state(zone, NR_INACTIVE)) * 6)
+                                               (zone_lru_pages(zone) * 6))
                                        zone_set_flag(zone,
                                                      ZONE_ALL_UNRECLAIMABLE);
                        /*
@@ -1667,7 +1877,7 @@ out:
 
 /*
  * The background pageout daemon, started as a kernel thread
- * from the init process. 
+ * from the init process.
  *
  * This basically trickles out pages so that we have _some_
  * free memory available even if there is no other activity
@@ -1761,6 +1971,14 @@ void wakeup_kswapd(struct zone *zone, int order)
        wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
+unsigned long global_lru_pages(void)
+{
+       return global_page_state(NR_ACTIVE_ANON)
+               + global_page_state(NR_ACTIVE_FILE)
+               + global_page_state(NR_INACTIVE_ANON)
+               + global_page_state(NR_INACTIVE_FILE);
+}
+
 #ifdef CONFIG_PM
 /*
  * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
@@ -1774,6 +1992,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 {
        struct zone *zone;
        unsigned long nr_to_scan, ret = 0;
+       enum lru_list l;
 
        for_each_zone(zone) {
 
@@ -1783,38 +2002,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
                if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
                        continue;
 
-               /* For pass = 0 we don't shrink the active list */
-               if (pass > 0) {
-                       zone->nr_scan_active +=
-                               (zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
-                       if (zone->nr_scan_active >= nr_pages || pass > 3) {
-                               zone->nr_scan_active = 0;
+               for_each_evictable_lru(l) {
+                       /* For pass = 0, we don't shrink the active list */
+                       if (pass == 0 &&
+                               (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE))
+                               continue;
+
+                       zone->lru[l].nr_scan +=
+                               (zone_page_state(zone, NR_LRU_BASE + l)
+                                                               >> prio) + 1;
+                       if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
+                               zone->lru[l].nr_scan = 0;
                                nr_to_scan = min(nr_pages,
-                                       zone_page_state(zone, NR_ACTIVE));
-                               shrink_active_list(nr_to_scan, zone, sc, prio);
+                                       zone_page_state(zone,
+                                                       NR_LRU_BASE + l));
+                               ret += shrink_list(l, nr_to_scan, zone,
+                                                               sc, prio);
+                               if (ret >= nr_pages)
+                                       return ret;
                        }
                }
-
-               zone->nr_scan_inactive +=
-                       (zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
-               if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
-                       zone->nr_scan_inactive = 0;
-                       nr_to_scan = min(nr_pages,
-                               zone_page_state(zone, NR_INACTIVE));
-                       ret += shrink_inactive_list(nr_to_scan, zone, sc);
-                       if (ret >= nr_pages)
-                               return ret;
-               }
        }
 
        return ret;
 }
 
-static unsigned long count_lru_pages(void)
-{
-       return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
-}
-
 /*
  * Try to free `nr_pages' of memory, system-wide, and return the number of
  * freed pages.
@@ -1840,7 +2052,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
        current->reclaim_state = &reclaim_state;
 
-       lru_pages = count_lru_pages();
+       lru_pages = global_lru_pages();
        nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
        /* If slab caches are huge, it's better to hit them first */
        while (nr_slab >= lru_pages) {
@@ -1883,7 +2095,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
                        reclaim_state.reclaimed_slab = 0;
                        shrink_slab(sc.nr_scanned, sc.gfp_mask,
-                                       count_lru_pages());
+                                       global_lru_pages());
                        ret += reclaim_state.reclaimed_slab;
                        if (ret >= nr_pages)
                                goto out;
@@ -1900,7 +2112,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
        if (!ret) {
                do {
                        reclaim_state.reclaimed_slab = 0;
-                       shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
+                       shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
                        ret += reclaim_state.reclaimed_slab;
                } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
        }
@@ -2128,3 +2340,285 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
        return ret;
 }
 #endif
+
+#ifdef CONFIG_UNEVICTABLE_LRU
+/*
+ * page_evictable - test whether a page is evictable
+ * @page: the page to test
+ * @vma: the VMA in which the page is or will be mapped, may be NULL
+ *
+ * Test whether page is evictable--i.e., should be placed on active/inactive
+ * lists vs unevictable list.  The vma argument is !NULL when called from the
+ * fault path to determine how to instantate a new page.
+ *
+ * Reasons page might not be evictable:
+ * (1) page's mapping marked unevictable
+ * (2) page is part of an mlocked VMA
+ *
+ */
+int page_evictable(struct page *page, struct vm_area_struct *vma)
+{
+
+       if (mapping_unevictable(page_mapping(page)))
+               return 0;
+
+       if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
+               return 0;
+
+       return 1;
+}
+
+static void show_page_path(struct page *page)
+{
+       char buf[256];
+       if (page_is_file_cache(page)) {
+               struct address_space *mapping = page->mapping;
+               struct dentry *dentry;
+               pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+               spin_lock(&mapping->i_mmap_lock);
+               dentry = d_find_alias(mapping->host);
+               printk(KERN_INFO "rescued: %s %lu\n",
+                      dentry_path(dentry, buf, 256), pgoff);
+               spin_unlock(&mapping->i_mmap_lock);
+       } else {
+#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU)
+               struct anon_vma *anon_vma;
+               struct vm_area_struct *vma;
+
+               anon_vma = page_lock_anon_vma(page);
+               if (!anon_vma)
+                       return;
+
+               list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+                       printk(KERN_INFO "rescued: anon %s\n",
+                              vma->vm_mm->owner->comm);
+                       break;
+               }
+               page_unlock_anon_vma(anon_vma);
+#endif
+       }
+}
+
+
+/**
+ * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
+ * @page: page to check evictability and move to appropriate lru list
+ * @zone: zone page is in
+ *
+ * Checks a page for evictability and moves the page to the appropriate
+ * zone lru list.
+ *
+ * Restrictions: zone->lru_lock must be held, page must be on LRU and must
+ * have PageUnevictable set.
+ */
+static void check_move_unevictable_page(struct page *page, struct zone *zone)
+{
+       VM_BUG_ON(PageActive(page));
+
+retry:
+       ClearPageUnevictable(page);
+       if (page_evictable(page, NULL)) {
+               enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
+
+               show_page_path(page);
+
+               __dec_zone_state(zone, NR_UNEVICTABLE);
+               list_move(&page->lru, &zone->lru[l].list);
+               __inc_zone_state(zone, NR_INACTIVE_ANON + l);
+               __count_vm_event(UNEVICTABLE_PGRESCUED);
+       } else {
+               /*
+                * rotate unevictable list
+                */
+               SetPageUnevictable(page);
+               list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
+               if (page_evictable(page, NULL))
+                       goto retry;
+       }
+}
+
+/**
+ * scan_mapping_unevictable_pages - scan an address space for evictable pages
+ * @mapping: struct address_space to scan for evictable pages
+ *
+ * Scan all pages in mapping.  Check unevictable pages for
+ * evictability and move them to the appropriate zone lru list.
+ */
+void scan_mapping_unevictable_pages(struct address_space *mapping)
+{
+       pgoff_t next = 0;
+       pgoff_t end   = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
+                        PAGE_CACHE_SHIFT;
+       struct zone *zone;
+       struct pagevec pvec;
+
+       if (mapping->nrpages == 0)
+               return;
+
+       pagevec_init(&pvec, 0);
+       while (next < end &&
+               pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+               int i;
+               int pg_scanned = 0;
+
+               zone = NULL;
+
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *page = pvec.pages[i];
+                       pgoff_t page_index = page->index;
+                       struct zone *pagezone = page_zone(page);
+
+                       pg_scanned++;
+                       if (page_index > next)
+                               next = page_index;
+                       next++;
+
+                       if (pagezone != zone) {
+                               if (zone)
+                                       spin_unlock_irq(&zone->lru_lock);
+                               zone = pagezone;
+                               spin_lock_irq(&zone->lru_lock);
+                       }
+
+                       if (PageLRU(page) && PageUnevictable(page))
+                               check_move_unevictable_page(page, zone);
+               }
+               if (zone)
+                       spin_unlock_irq(&zone->lru_lock);
+               pagevec_release(&pvec);
+
+               count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
+       }
+
+}
+
+/**
+ * scan_zone_unevictable_pages - check unevictable list for evictable pages
+ * @zone - zone of which to scan the unevictable list
+ *
+ * Scan @zone's unevictable LRU lists to check for pages that have become
+ * evictable.  Move those that have to @zone's inactive list where they
+ * become candidates for reclaim, unless shrink_inactive_zone() decides
+ * to reactivate them.  Pages that are still unevictable are rotated
+ * back onto @zone's unevictable list.
+ */
+#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */
+void scan_zone_unevictable_pages(struct zone *zone)
+{
+       struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
+       unsigned long scan;
+       unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
+
+       while (nr_to_scan > 0) {
+               unsigned long batch_size = min(nr_to_scan,
+                                               SCAN_UNEVICTABLE_BATCH_SIZE);
+
+               spin_lock_irq(&zone->lru_lock);
+               for (scan = 0;  scan < batch_size; scan++) {
+                       struct page *page = lru_to_page(l_unevictable);
+
+                       if (!trylock_page(page))
+                               continue;
+
+                       prefetchw_prev_lru_page(page, l_unevictable, flags);
+
+                       if (likely(PageLRU(page) && PageUnevictable(page)))
+                               check_move_unevictable_page(page, zone);
+
+                       unlock_page(page);
+               }
+               spin_unlock_irq(&zone->lru_lock);
+
+               nr_to_scan -= batch_size;
+       }
+}
+
+
+/**
+ * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages
+ *
+ * A really big hammer:  scan all zones' unevictable LRU lists to check for
+ * pages that have become evictable.  Move those back to the zones'
+ * inactive list where they become candidates for reclaim.
+ * This occurs when, e.g., we have unswappable pages on the unevictable lists,
+ * and we add swap to the system.  As such, it runs in the context of a task
+ * that has possibly/probably made some previously unevictable pages
+ * evictable.
+ */
+void scan_all_zones_unevictable_pages(void)
+{
+       struct zone *zone;
+
+       for_each_zone(zone) {
+               scan_zone_unevictable_pages(zone);
+       }
+}
+
+/*
+ * scan_unevictable_pages [vm] sysctl handler.  On demand re-scan of
+ * all nodes' unevictable lists for evictable pages
+ */
+unsigned long scan_unevictable_pages;
+
+int scan_unevictable_handler(struct ctl_table *table, int write,
+                          struct file *file, void __user *buffer,
+                          size_t *length, loff_t *ppos)
+{
+       proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+
+       if (write && *(unsigned long *)table->data)
+               scan_all_zones_unevictable_pages();
+
+       scan_unevictable_pages = 0;
+       return 0;
+}
+
+/*
+ * per node 'scan_unevictable_pages' attribute.  On demand re-scan of
+ * a specified node's per zone unevictable lists for evictable pages.
+ */
+
+static ssize_t read_scan_unevictable_node(struct sys_device *dev,
+                                         struct sysdev_attribute *attr,
+                                         char *buf)
+{
+       return sprintf(buf, "0\n");     /* always zero; should fit... */
+}
+
+static ssize_t write_scan_unevictable_node(struct sys_device *dev,
+                                          struct sysdev_attribute *attr,
+                                       const char *buf, size_t count)
+{
+       struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
+       struct zone *zone;
+       unsigned long res;
+       unsigned long req = strict_strtoul(buf, 10, &res);
+
+       if (!req)
+               return 1;       /* zero is no-op */
+
+       for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
+               if (!populated_zone(zone))
+                       continue;
+               scan_zone_unevictable_pages(zone);
+       }
+       return 1;
+}
+
+
+static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
+                       read_scan_unevictable_node,
+                       write_scan_unevictable_node);
+
+int scan_unevictable_register_node(struct node *node)
+{
+       return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
+}
+
+void scan_unevictable_unregister_node(struct node *node)
+{
+       sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
+}
+
+#endif
index d7826af2fb073b15e2488470a67ad5a965fa7518..9343227c5c60b6dc3a1b7e09c64143a53a407003 100644 (file)
@@ -619,8 +619,14 @@ const struct seq_operations pagetypeinfo_op = {
 static const char * const vmstat_text[] = {
        /* Zoned VM counters */
        "nr_free_pages",
-       "nr_inactive",
-       "nr_active",
+       "nr_inactive_anon",
+       "nr_active_anon",
+       "nr_inactive_file",
+       "nr_active_file",
+#ifdef CONFIG_UNEVICTABLE_LRU
+       "nr_unevictable",
+       "nr_mlock",
+#endif
        "nr_anon_pages",
        "nr_mapped",
        "nr_file_pages",
@@ -675,6 +681,16 @@ static const char * const vmstat_text[] = {
        "htlb_buddy_alloc_success",
        "htlb_buddy_alloc_fail",
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+       "unevictable_pgs_culled",
+       "unevictable_pgs_scanned",
+       "unevictable_pgs_rescued",
+       "unevictable_pgs_mlocked",
+       "unevictable_pgs_munlocked",
+       "unevictable_pgs_cleared",
+       "unevictable_pgs_stranded",
+       "unevictable_pgs_mlockfreed",
+#endif
 #endif
 };
 
@@ -688,7 +704,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   "\n        min      %lu"
                   "\n        low      %lu"
                   "\n        high     %lu"
-                  "\n        scanned  %lu (a: %lu i: %lu)"
+                  "\n        scanned  %lu (aa: %lu ia: %lu af: %lu if: %lu)"
                   "\n        spanned  %lu"
                   "\n        present  %lu",
                   zone_page_state(zone, NR_FREE_PAGES),
@@ -696,7 +712,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   zone->pages_low,
                   zone->pages_high,
                   zone->pages_scanned,
-                  zone->nr_scan_active, zone->nr_scan_inactive,
+                  zone->lru[LRU_ACTIVE_ANON].nr_scan,
+                  zone->lru[LRU_INACTIVE_ANON].nr_scan,
+                  zone->lru[LRU_ACTIVE_FILE].nr_scan,
+                  zone->lru[LRU_INACTIVE_FILE].nr_scan,
                   zone->spanned_pages,
                   zone->present_pages);
 
@@ -733,10 +752,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
        seq_printf(m,
                   "\n  all_unreclaimable: %u"
                   "\n  prev_priority:     %i"
-                  "\n  start_pfn:         %lu",
+                  "\n  start_pfn:         %lu"
+                  "\n  inactive_ratio:    %u",
                           zone_is_all_unreclaimable(zone),
                   zone->prev_priority,
-                  zone->zone_start_pfn);
+                  zone->zone_start_pfn,
+                  zone->inactive_ratio);
        seq_putc(m, '\n');
 }
 
index a4abed5b4c442cba6bb8e5b5f878c8f063f73b49..fa5cda4e552ab0088901a2820f4267d31b303030 100644 (file)
@@ -719,7 +719,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
                return NF_ACCEPT;
        }
        *d = (struct net_device *)in;
-       NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
+       NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
                (struct net_device *)out, br_nf_forward_finish);
 
        return NF_STOLEN;
index 868ec0ba8b77dc7088c28816238b2ee557316e01..b8a4fd0806af3fca72224828293a497f13281831 100644 (file)
@@ -924,10 +924,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
                strlcpy(dev->name, newname, IFNAMSIZ);
 
 rollback:
-       err = device_rename(&dev->dev, dev->name);
-       if (err) {
+       ret = device_rename(&dev->dev, dev->name);
+       if (ret) {
                memcpy(dev->name, oldname, IFNAMSIZ);
-               return err;
+               return ret;
        }
 
        write_lock_bh(&dev_base_lock);
index 11062780bb02b84990aa4c89e40d3815940ecec7..d4ce1224e008f13ee4da674cec0c6047a3a9f25f 100644 (file)
@@ -259,7 +259,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
        fl.fl6_flowlabel = 0;
        fl.oif = ireq6->iif;
        fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-       fl.fl_ip_sport = inet_sk(sk)->sport;
+       fl.fl_ip_sport = inet_rsk(req)->loc_port;
        security_req_classify_flow(req, &fl);
 
        opt = np->opt;
@@ -558,7 +558,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
                ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
                fl.oif = sk->sk_bound_dev_if;
                fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-               fl.fl_ip_sport = inet_sk(sk)->sport;
+               fl.fl_ip_sport = inet_rsk(req)->loc_port;
                security_sk_classify_flow(sk, &fl);
 
                if (ip6_dst_lookup(sk, &dst, &fl))
index b2804e2d1b8cadbc26e266fea24d7e525a89e888..e6bf99e3e41a169aec04880e52d60adb41dc8b62 100644 (file)
@@ -309,6 +309,7 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
        struct dccp_request_sock *dreq = dccp_rsk(req);
 
        inet_rsk(req)->rmt_port   = dccp_hdr(skb)->dccph_sport;
+       inet_rsk(req)->loc_port   = dccp_hdr(skb)->dccph_dport;
        inet_rsk(req)->acked      = 0;
        req->rcv_wnd              = sysctl_dccp_feat_sequence_window;
        dreq->dreq_timestamp_echo = 0;
index d06945c7d3dfc3e1201a00840516a457e9773303..809d803d5006ec6614821b9d0d48b63cf131a31f 100644 (file)
@@ -347,7 +347,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
        /* Build and checksum header */
        dh = dccp_zeroed_hdr(skb, dccp_header_size);
 
-       dh->dccph_sport = inet_sk(sk)->sport;
+       dh->dccph_sport = inet_rsk(req)->loc_port;
        dh->dccph_dport = inet_rsk(req)->rmt_port;
        dh->dccph_doff  = (dccp_header_size +
                           DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
index b043eda60b04277734d79112e8357078e66553dc..1a9dd66511fccfadf9e1240334e7dcc2a3f073b0 100644 (file)
@@ -663,7 +663,7 @@ out:
 void arp_xmit(struct sk_buff *skb)
 {
        /* Send it off, maybe filter it using firewalling first.  */
-       NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
+       NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
 }
 
 /*
@@ -928,7 +928,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 
        memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
 
-       return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
+       return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
 
 freeskb:
        kfree_skb(skb);
index ffeaffc3fffe6c31c78e858b9a1fa61fe71ba03e..8303e4b406c05b15245d7f89d8e0c2d16f50b992 100644 (file)
@@ -742,6 +742,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
                        *obj = kmalloc(sizeof(struct snmp_object) + len,
                                       GFP_ATOMIC);
                        if (*obj == NULL) {
+                               kfree(p);
                                kfree(id);
                                if (net_ratelimit())
                                        printk("OOM in bsalg (%d)\n", __LINE__);
index ec394cf5a19bf80b480264199a224be336ee2cea..676c80b5b14b8d13a518ce3a7d7b5f0c48208c5b 100644 (file)
@@ -204,6 +204,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
        req->mss = mss;
        ireq->rmt_port = th->source;
+       ireq->loc_port = th->dest;
        ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
        ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
        if (ipv6_opt_accepted(sk, skb) ||
index e5310c9b84dcb2216f1db0b21ab92c3ed7a222b7..b6b356b7912a57c298a542731c2c9daf24a77bbc 100644 (file)
@@ -476,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
        fl.fl6_flowlabel = 0;
        fl.oif = treq->iif;
        fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-       fl.fl_ip_sport = inet_sk(sk)->sport;
+       fl.fl_ip_sport = inet_rsk(req)->loc_port;
        security_req_classify_flow(req, &fl);
 
        opt = np->opt;
@@ -1309,7 +1309,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
                ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
                fl.oif = sk->sk_bound_dev_if;
                fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-               fl.fl_ip_sport = inet_sk(sk)->sport;
+               fl.fl_ip_sport = inet_rsk(req)->loc_port;
                security_req_classify_flow(req, &fl);
 
                if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1865,7 +1865,7 @@ static void get_openreq6(struct seq_file *seq,
                   i,
                   src->s6_addr32[0], src->s6_addr32[1],
                   src->s6_addr32[2], src->s6_addr32[3],
-                  ntohs(inet_sk(sk)->sport),
+                  ntohs(inet_rsk(req)->loc_port),
                   dest->s6_addr32[0], dest->s6_addr32[1],
                   dest->s6_addr32[2], dest->s6_addr32[3],
                   ntohs(inet_rsk(req)->rmt_port),
index 78892cf2b021c0d4a4bedf05075535f44c125f82..25dcef9f219493a597511c157efc9c2fe4f343e6 100644 (file)
@@ -271,7 +271,6 @@ config NF_CONNTRACK_TFTP
 config NF_CT_NETLINK
        tristate 'Connection tracking netlink interface'
        select NETFILTER_NETLINK
-       depends on NF_NAT=n || NF_NAT
        default m if NETFILTER_ADVANCED=n
        help
          This option enables support for a netlink-based userspace interface
index 05048e4032661d7b96477db4b9ed6501713797cd..79a69805221889c91578ef8f45bf6a8313a3420e 100644 (file)
@@ -25,11 +25,13 @@ menuconfig IP_VS
 if IP_VS
 
 config IP_VS_IPV6
-       bool "IPv6 support for IPVS (DANGEROUS)"
+       bool "IPv6 support for IPVS"
        depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
        ---help---
          Add IPv6 support to IPVS. This is incomplete and might be dangerous.
 
+         See http://www.mindbasket.com/ipvs for more information.
+
          Say N if unsure.
 
 config IP_VS_DEBUG
index 2e4ad9671e19c0203915a13e7b7108aa65f34a54..a040d46f85d6c8967f88af6c549f23206e6416c7 100644 (file)
@@ -813,6 +813,7 @@ out:
        return err;
 }
 
+#ifdef CONFIG_NF_NAT_NEEDED
 static int
 ctnetlink_parse_nat_setup(struct nf_conn *ct,
                          enum nf_nat_manip_type manip,
@@ -840,6 +841,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 
        return parse_nat_setup(ct, manip, attr);
 }
+#endif
 
 static int
 ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
index 2cc1fff493071c61a119cc9c93a1d20a03483b62..f9977b3311f75692239c2f7c32ca7a417ed3eddb 100644 (file)
@@ -48,7 +48,7 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
        },
        {
                .name           = "NFQUEUE",
-               .family         = NF_ARP,
+               .family         = NFPROTO_ARP,
                .target         = nfqueue_tg,
                .targetsize     = sizeof(struct xt_NFQ_info),
                .me             = THIS_MODULE,
index 6f62c36948d94ef4c25f143334f7b6884fc1d1ae..7ac54eab0b00a6a0249afcc43ca6afea807410a0 100644 (file)
@@ -61,7 +61,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
        if (info->flags & IPRANGE_SRC) {
                m  = ntohl(iph->saddr) < ntohl(info->src_min.ip);
                m |= ntohl(iph->saddr) > ntohl(info->src_max.ip);
-               m ^= info->flags & IPRANGE_SRC_INV;
+               m ^= !!(info->flags & IPRANGE_SRC_INV);
                if (m) {
                        pr_debug("src IP " NIPQUAD_FMT " NOT in range %s"
                                 NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -75,7 +75,7 @@ iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
        if (info->flags & IPRANGE_DST) {
                m  = ntohl(iph->daddr) < ntohl(info->dst_min.ip);
                m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip);
-               m ^= info->flags & IPRANGE_DST_INV;
+               m ^= !!(info->flags & IPRANGE_DST_INV);
                if (m) {
                        pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s"
                                 NIPQUAD_FMT "-" NIPQUAD_FMT "\n",
@@ -114,14 +114,14 @@ iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
        if (info->flags & IPRANGE_SRC) {
                m  = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
                m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
-               m ^= info->flags & IPRANGE_SRC_INV;
+               m ^= !!(info->flags & IPRANGE_SRC_INV);
                if (m)
                        return false;
        }
        if (info->flags & IPRANGE_DST) {
                m  = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
                m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
-               m ^= info->flags & IPRANGE_DST_INV;
+               m ^= !!(info->flags & IPRANGE_DST_INV);
                if (m)
                        return false;
        }
index 4ebd4ca9a991bf871a23bf672ea2b30d5a9095d7..280c471bcdf444fdc2228a46895c2b44baf8d549 100644 (file)
@@ -318,15 +318,15 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
        for (i = 0; i < ip_list_hash_size; i++)
                INIT_LIST_HEAD(&t->iphash[i]);
 #ifdef CONFIG_PROC_FS
-       t->proc = proc_create(t->name, ip_list_perms, recent_proc_dir,
-                 &recent_mt_fops);
+       t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
+                 &recent_mt_fops, t);
        if (t->proc == NULL) {
                kfree(t);
                goto out;
        }
 #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-       t->proc_old = proc_create(t->name, ip_list_perms, proc_old_dir,
-                     &recent_old_fops);
+       t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir,
+                     &recent_old_fops, t);
        if (t->proc_old == NULL) {
                remove_proc_entry(t->name, proc_old_dir);
                kfree(t);
@@ -334,11 +334,9 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
        }
        t->proc_old->uid   = ip_list_uid;
        t->proc_old->gid   = ip_list_gid;
-       t->proc_old->data  = t;
 #endif
        t->proc->uid       = ip_list_uid;
        t->proc->gid       = ip_list_gid;
-       t->proc->data      = t;
 #endif
        spin_lock_bh(&recent_lock);
        list_add_tail(&t->list, &tables);
index 7b5572d6beb59b561e92ad6d030d801f5fd0c72f..93cd30ce65011d2b84541277c15664ed6287fc3d 100644 (file)
@@ -326,6 +326,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 
 static struct netdev_queue noop_netdev_queue = {
        .qdisc          =       &noop_qdisc,
+       .qdisc_sleeping =       &noop_qdisc,
 };
 
 struct Qdisc noop_qdisc = {
@@ -352,6 +353,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 static struct Qdisc noqueue_qdisc;
 static struct netdev_queue noqueue_netdev_queue = {
        .qdisc          =       &noqueue_qdisc,
+       .qdisc_sleeping =       &noqueue_qdisc,
 };
 
 static struct Qdisc noqueue_qdisc = {
index 46f23971f7e4208d9487cbb9cf477790a8fb378a..5ba78701adc3ab868079ee180b1fc2e4ffc3b95f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * dev_cgroup.c - device cgroup subsystem
+ * device_cgroup.c - device cgroup subsystem
  *
  * Copyright 2007 IBM Corp
  */
@@ -10,6 +10,7 @@
 #include <linux/list.h>
 #include <linux/uaccess.h>
 #include <linux/seq_file.h>
+#include <linux/rcupdate.h>
 
 #define ACC_MKNOD 1
 #define ACC_READ  2
 
 /*
  * whitelist locking rules:
- * cgroup_lock() cannot be taken under dev_cgroup->lock.
- * dev_cgroup->lock can be taken with or without cgroup_lock().
- *
- * modifications always require cgroup_lock
- * modifications to a list which is visible require the
- *   dev_cgroup->lock *and* cgroup_lock()
- * walking the list requires dev_cgroup->lock or cgroup_lock().
- *
- * reasoning: dev_whitelist_copy() needs to kmalloc, so needs
- *   a mutex, which the cgroup_lock() is.  Since modifying
- *   a visible list requires both locks, either lock can be
- *   taken for walking the list.
+ * hold cgroup_lock() for update/read.
+ * hold rcu_read_lock() for read.
  */
 
 struct dev_whitelist_item {
@@ -47,7 +38,6 @@ struct dev_whitelist_item {
 struct dev_cgroup {
        struct cgroup_subsys_state css;
        struct list_head whitelist;
-       spinlock_t lock;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
@@ -84,13 +74,9 @@ static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
        struct dev_whitelist_item *wh, *tmp, *new;
 
        list_for_each_entry(wh, orig, list) {
-               new = kmalloc(sizeof(*wh), GFP_KERNEL);
+               new = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
                if (!new)
                        goto free_and_exit;
-               new->major = wh->major;
-               new->minor = wh->minor;
-               new->type = wh->type;
-               new->access = wh->access;
                list_add_tail(&new->list, dest);
        }
 
@@ -107,19 +93,16 @@ free_and_exit:
 /* Stupid prototype - don't bother combining existing entries */
 /*
  * called under cgroup_lock()
- * since the list is visible to other tasks, we need the spinlock also
  */
 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
                        struct dev_whitelist_item *wh)
 {
        struct dev_whitelist_item *whcopy, *walk;
 
-       whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL);
+       whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
        if (!whcopy)
                return -ENOMEM;
 
-       memcpy(whcopy, wh, sizeof(*whcopy));
-       spin_lock(&dev_cgroup->lock);
        list_for_each_entry(walk, &dev_cgroup->whitelist, list) {
                if (walk->type != wh->type)
                        continue;
@@ -135,7 +118,6 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 
        if (whcopy != NULL)
                list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
-       spin_unlock(&dev_cgroup->lock);
        return 0;
 }
 
@@ -149,14 +131,12 @@ static void whitelist_item_free(struct rcu_head *rcu)
 
 /*
  * called under cgroup_lock()
- * since the list is visible to other tasks, we need the spinlock also
  */
 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
                        struct dev_whitelist_item *wh)
 {
        struct dev_whitelist_item *walk, *tmp;
 
-       spin_lock(&dev_cgroup->lock);
        list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) {
                if (walk->type == DEV_ALL)
                        goto remove;
@@ -174,7 +154,6 @@ remove:
                        call_rcu(&walk->rcu, whitelist_item_free);
                }
        }
-       spin_unlock(&dev_cgroup->lock);
 }
 
 /*
@@ -214,7 +193,6 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
                }
        }
 
-       spin_lock_init(&dev_cgroup->lock);
        return &dev_cgroup->css;
 }
 
@@ -330,15 +308,11 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 {
        struct cgroup *pcg = childcg->css.cgroup->parent;
        struct dev_cgroup *parent;
-       int ret;
 
        if (!pcg)
                return 1;
        parent = cgroup_to_devcgroup(pcg);
-       spin_lock(&parent->lock);
-       ret = may_access_whitelist(parent, wh);
-       spin_unlock(&parent->lock);
-       return ret;
+       return may_access_whitelist(parent, wh);
 }
 
 /*
@@ -357,17 +331,14 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 static int devcgroup_update_access(struct dev_cgroup *devcgroup,
                                   int filetype, const char *buffer)
 {
-       struct dev_cgroup *cur_devcgroup;
        const char *b;
        char *endp;
-       int retval = 0, count;
+       int count;
        struct dev_whitelist_item wh;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       cur_devcgroup = task_devcgroup(current);
-
        memset(&wh, 0, sizeof(wh));
        b = buffer;
 
@@ -437,7 +408,6 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
        }
 
 handle:
-       retval = 0;
        switch (filetype) {
        case DEVCG_ALLOW:
                if (!parent_has_perm(devcgroup, &wh))
index 89b7f549bebd772bb62378510a541bb0b052e967..ea2bf82c93730505768c28f00174d6484ac2bcf9 100644 (file)
@@ -319,6 +319,7 @@ EXPORT_SYMBOL(snd_pcm_format_physical_width);
 /**
  * snd_pcm_format_size - return the byte size of samples on the given format
  * @format: the format to check
+ * @samples: sampling rate
  *
  * Returns the byte size of the given samples for the format, or a
  * negative error code if unknown format.
index e5e749f3e0ef08df9fea76571c6a552a2724e388..73be7e14a603d56208aec9d9bd741b48c5444750 100644 (file)
@@ -51,7 +51,7 @@ static int emu10k1_playback_constraints(struct snd_pcm_runtime *runtime)
        if (err < 0)
                return err;
        err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 256, UINT_MAX);
-       if (err) < 0)
+       if (err < 0)
                return err;
        return 0;
 }
index a7d89662acf66e200c5305ac409fd3bfd7188500..88fbf285d2b74ea6cf1ccc701398d0e730784840 100644 (file)
@@ -759,7 +759,6 @@ static int snd_ca0106_pcm_prepare_playback(struct snd_pcm_substream *substream)
                               SPCS_CHANNELNUM_LEFT | SPCS_SOURCENUM_UNSPEC |
                               SPCS_GENERATIONSTATUS | 0x00001200 |
                               0x00000000 | SPCS_EMPHASIS_NONE | SPCS_COPYRIGHT );
-       }
 #endif
 
        return 0;
index 20d0e328288ab6ae4ea24abe202b6a3d0dd3d442..8f9e3859c37ca70f802fdb5c3f8f25ed75731b8d 100644 (file)
@@ -666,6 +666,7 @@ static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
        card->avs.avs_audio_width = PS3AV_CMD_AUDIO_WORD_BITS_16;
        card->avs.avs_audio_format = PS3AV_CMD_AUDIO_FORMAT_PCM;
        card->avs.avs_audio_source = PS3AV_CMD_AUDIO_SOURCE_SERIAL;
+       memcpy(card->avs.avs_cs_info, ps3av_mode_cs_info, 8);
 
        ret = snd_ps3_change_avsetting(card);
 
@@ -685,6 +686,7 @@ static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
 {
        struct snd_ps3_card_info *card = snd_pcm_substream_chip(substream);
        struct snd_ps3_avsetting_info avs;
+       int ret;
 
        avs = card->avs;
 
@@ -729,19 +731,92 @@ static int snd_ps3_set_avsetting(struct snd_pcm_substream *substream)
                return 1;
        }
 
-       if ((card->avs.avs_audio_width != avs.avs_audio_width) ||
-           (card->avs.avs_audio_rate != avs.avs_audio_rate)) {
-               card->avs = avs;
-               snd_ps3_change_avsetting(card);
+       memcpy(avs.avs_cs_info, ps3av_mode_cs_info, 8);
 
+       if (memcmp(&card->avs, &avs, sizeof(avs))) {
                pr_debug("%s: after freq=%d width=%d\n", __func__,
                         card->avs.avs_audio_rate, card->avs.avs_audio_width);
 
-               return 0;
+               card->avs = avs;
+               snd_ps3_change_avsetting(card);
+               ret = 0;
        } else
+               ret = 1;
+
+       /* check CS non-audio bit and mute accordingly */
+       if (avs.avs_cs_info[0] & 0x02)
+               ps3av_audio_mute_analog(1); /* mute if non-audio */
+       else
+               ps3av_audio_mute_analog(0);
+
+       return ret;
+}
+
+/*
+ * SPDIF status bits controls
+ */
+static int snd_ps3_spdif_mask_info(struct snd_kcontrol *kcontrol,
+                                  struct snd_ctl_elem_info *uinfo)
+{
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_IEC958;
+       uinfo->count = 1;
+       return 0;
+}
+
+/* FIXME: ps3av_set_audio_mode() assumes only consumer mode */
+static int snd_ps3_spdif_cmask_get(struct snd_kcontrol *kcontrol,
+                                  struct snd_ctl_elem_value *ucontrol)
+{
+       memset(ucontrol->value.iec958.status, 0xff, 8);
+       return 0;
+}
+
+static int snd_ps3_spdif_pmask_get(struct snd_kcontrol *kcontrol,
+                                  struct snd_ctl_elem_value *ucontrol)
+{
+       return 0;
+}
+
+static int snd_ps3_spdif_default_get(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       memcpy(ucontrol->value.iec958.status, ps3av_mode_cs_info, 8);
+       return 0;
+}
+
+static int snd_ps3_spdif_default_put(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       if (memcmp(ps3av_mode_cs_info, ucontrol->value.iec958.status, 8)) {
+               memcpy(ps3av_mode_cs_info, ucontrol->value.iec958.status, 8);
                return 1;
+       }
+       return 0;
 }
 
+static struct snd_kcontrol_new spdif_ctls[] = {
+       {
+               .access = SNDRV_CTL_ELEM_ACCESS_READ,
+               .iface = SNDRV_CTL_ELEM_IFACE_PCM,
+               .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,CON_MASK),
+               .info = snd_ps3_spdif_mask_info,
+               .get = snd_ps3_spdif_cmask_get,
+       },
+       {
+               .access = SNDRV_CTL_ELEM_ACCESS_READ,
+               .iface = SNDRV_CTL_ELEM_IFACE_PCM,
+               .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,PRO_MASK),
+               .info = snd_ps3_spdif_mask_info,
+               .get = snd_ps3_spdif_pmask_get,
+       },
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_PCM,
+               .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT),
+               .info = snd_ps3_spdif_mask_info,
+               .get = snd_ps3_spdif_default_get,
+               .put = snd_ps3_spdif_default_put,
+       },
+};
 
 
 static int snd_ps3_map_mmio(void)
@@ -842,7 +917,7 @@ static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
 
 static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
 {
-       int ret;
+       int i, ret;
        u64 lpar_addr, lpar_size;
 
        BUG_ON(!firmware_has_feature(FW_FEATURE_PS3_LV1));
@@ -903,6 +978,15 @@ static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
        strcpy(the_card.card->driver, "PS3");
        strcpy(the_card.card->shortname, "PS3");
        strcpy(the_card.card->longname, "PS3 sound");
+
+       /* create control elements */
+       for (i = 0; i < ARRAY_SIZE(spdif_ctls); i++) {
+               ret = snd_ctl_add(the_card.card,
+                                 snd_ctl_new1(&spdif_ctls[i], &the_card));
+               if (ret < 0)
+                       goto clean_card;
+       }
+
        /* create PCM devices instance */
        /* NOTE:this driver works assuming pcm:substream = 1:1 */
        ret = snd_pcm_new(the_card.card,
index 4b7e6fbbe5005769b6dbd059d1181ec5f5d44873..326fb29e82d842e43f4159fe94372b49f49b03b8 100644 (file)
@@ -51,6 +51,7 @@ struct snd_ps3_avsetting_info {
        uint32_t avs_audio_width;
        uint32_t avs_audio_format; /* fixed */
        uint32_t avs_audio_source; /* fixed */
+       unsigned char avs_cs_info[8];
 };
 /*
  * PS3 audio 'card' instance
index 0a063a98a6613820295419a5cab10bd09a8dc338..853b33ae3435297f414eaee086b31bdf6a4b1333 100644 (file)
@@ -43,6 +43,7 @@
 struct omap_mcbsp_data {
        unsigned int                    bus_id;
        struct omap_mcbsp_reg_cfg       regs;
+       unsigned int                    fmt;
        /*
         * Flags indicating is the bus already activated and configured by
         * another substream
@@ -200,6 +201,7 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
        struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
        struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
        int dma, bus_id = mcbsp_data->bus_id, id = cpu_dai->id;
+       int wlen;
        unsigned long port;
 
        if (cpu_class_is_omap1()) {
@@ -244,19 +246,29 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
        switch (params_format(params)) {
        case SNDRV_PCM_FORMAT_S16_LE:
                /* Set word lengths */
+               wlen = 16;
                regs->rcr2      |= RWDLEN2(OMAP_MCBSP_WORD_16);
                regs->rcr1      |= RWDLEN1(OMAP_MCBSP_WORD_16);
                regs->xcr2      |= XWDLEN2(OMAP_MCBSP_WORD_16);
                regs->xcr1      |= XWDLEN1(OMAP_MCBSP_WORD_16);
-               /* Set FS period and length in terms of bit clock periods */
-               regs->srgr2     |= FPER(16 * 2 - 1);
-               regs->srgr1     |= FWID(16 - 1);
                break;
        default:
                /* Unsupported PCM format */
                return -EINVAL;
        }
 
+       /* Set FS period and length in terms of bit clock periods */
+       switch (mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+       case SND_SOC_DAIFMT_I2S:
+               regs->srgr2     |= FPER(wlen * 2 - 1);
+               regs->srgr1     |= FWID(wlen - 1);
+               break;
+       case SND_SOC_DAIFMT_DSP_A:
+               regs->srgr2     |= FPER(wlen * 2 - 1);
+               regs->srgr1     |= FWID(0);
+               break;
+       }
+
        omap_mcbsp_config(bus_id, &mcbsp_data->regs);
        mcbsp_data->configured = 1;
 
@@ -272,10 +284,12 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 {
        struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data);
        struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs;
+       unsigned int temp_fmt = fmt;
 
        if (mcbsp_data->configured)
                return 0;
 
+       mcbsp_data->fmt = fmt;
        memset(regs, 0, sizeof(*regs));
        /* Generic McBSP register settings */
        regs->spcr2     |= XINTM(3) | FREE;
@@ -293,6 +307,8 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
                /* 0-bit data delay */
                regs->rcr2      |= RDATDLY(0);
                regs->xcr2      |= XDATDLY(0);
+               /* Invert bit clock and FS polarity configuration for DSP_A */
+               temp_fmt ^= SND_SOC_DAIFMT_IB_IF;
                break;
        default:
                /* Unsupported data format */
@@ -316,7 +332,7 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai,
        }
 
        /* Set bit clock (CLKX/CLKR) and FS polarities */
-       switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+       switch (temp_fmt & SND_SOC_DAIFMT_INV_MASK) {
        case SND_SOC_DAIFMT_NB_NF:
                /*
                 * Normal BCLK + FS.