Merge branch 'apei-release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb...
Linus Torvalds [Thu, 4 Aug 2011 07:53:27 +0000 (21:53 -1000)]
* 'apei-release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6:
  ACPI, APEI, EINJ Param support is disabled by default
  APEI GHES: 32-bit buildfix
  ACPI: APEI build fix
  ACPI, APEI, GHES: Add hardware memory error recovery support
  HWPoison: add memory_failure_queue()
  ACPI, APEI, GHES, Error records content based throttle
  ACPI, APEI, GHES, printk support for recoverable error via NMI
  lib, Make gen_pool memory allocator lockless
  lib, Add lock-less NULL terminated single list
  Add Kconfig option ARCH_HAVE_NMI_SAFE_CMPXCHG
  ACPI, APEI, Add WHEA _OSC support
  ACPI, APEI, Add APEI bit support in generic _OSC call
  ACPI, APEI, GHES, Support disable GHES at boot time
  ACPI, APEI, GHES, Prevent GHES to be built as module
  ACPI, APEI, Use apei_exec_run_optional in APEI EINJ and ERST
  ACPI, APEI, Add apei_exec_run_optional
  ACPI, APEI, GHES, Do not ratelimit fatal error printk before panic
  ACPI, APEI, ERST, Fix erst-dbg long record reading issue
  ACPI, APEI, ERST, Prevent erst_dbg from loading if ERST is disabled

183 files changed:
Documentation/device-mapper/dm-crypt.txt
Documentation/device-mapper/dm-flakey.txt
Documentation/device-mapper/dm-raid.txt
Documentation/fault-injection/fault-injection.txt
Documentation/feature-removal-schedule.txt
Documentation/frv/booting.txt
Documentation/ioctl/ioctl-number.txt
Documentation/kernel-parameters.txt
Documentation/m68k/kernel-options.txt
MAINTAINERS
arch/cris/arch-v10/drivers/sync_serial.c
arch/cris/arch-v10/kernel/irq.c
arch/cris/include/asm/thread_info.h
arch/tile/include/asm/Kbuild
arch/tile/include/asm/bug.h [deleted file]
arch/tile/include/asm/bugs.h [deleted file]
arch/tile/include/asm/cputime.h [deleted file]
arch/tile/include/asm/device.h [deleted file]
arch/tile/include/asm/div64.h [deleted file]
arch/tile/include/asm/emergency-restart.h [deleted file]
arch/tile/include/asm/errno.h [deleted file]
arch/tile/include/asm/fb.h [deleted file]
arch/tile/include/asm/fcntl.h [deleted file]
arch/tile/include/asm/fixmap.h
arch/tile/include/asm/ioctl.h [deleted file]
arch/tile/include/asm/ioctls.h [deleted file]
arch/tile/include/asm/ipc.h [deleted file]
arch/tile/include/asm/ipcbuf.h [deleted file]
arch/tile/include/asm/irq_regs.h [deleted file]
arch/tile/include/asm/kdebug.h [deleted file]
arch/tile/include/asm/local.h [deleted file]
arch/tile/include/asm/module.h [deleted file]
arch/tile/include/asm/msgbuf.h [deleted file]
arch/tile/include/asm/mutex.h [deleted file]
arch/tile/include/asm/param.h [deleted file]
arch/tile/include/asm/parport.h [deleted file]
arch/tile/include/asm/poll.h [deleted file]
arch/tile/include/asm/posix_types.h [deleted file]
arch/tile/include/asm/resource.h [deleted file]
arch/tile/include/asm/scatterlist.h [deleted file]
arch/tile/include/asm/sembuf.h [deleted file]
arch/tile/include/asm/serial.h [deleted file]
arch/tile/include/asm/shmbuf.h [deleted file]
arch/tile/include/asm/shmparam.h [deleted file]
arch/tile/include/asm/socket.h [deleted file]
arch/tile/include/asm/sockios.h [deleted file]
arch/tile/include/asm/statfs.h [deleted file]
arch/tile/include/asm/termbits.h [deleted file]
arch/tile/include/asm/termios.h [deleted file]
arch/tile/include/asm/types.h [deleted file]
arch/tile/include/asm/ucontext.h [deleted file]
arch/tile/include/asm/xor.h [deleted file]
arch/tile/include/hv/drv_srom_intf.h [new file with mode: 0644]
arch/tile/kernel/time.c
arch/tile/mm/init.c
block/blk-core.c
block/blk-timeout.c
drivers/acpi/acpica/acglobal.h
drivers/acpi/acpica/aclocal.h
drivers/acpi/acpica/acpredef.h
drivers/acpi/acpica/nspredef.c
drivers/acpi/acpica/nsrepair2.c
drivers/acpi/acpica/tbinstal.c
drivers/acpi/battery.c
drivers/acpi/dock.c
drivers/acpi/ec_sys.c
drivers/acpi/fan.c
drivers/acpi/osl.c
drivers/acpi/pci_irq.c
drivers/acpi/pci_root.c
drivers/acpi/processor_thermal.c
drivers/acpi/sbs.c
drivers/acpi/sleep.c
drivers/acpi/sysfs.c
drivers/acpi/thermal.c
drivers/acpi/video.c
drivers/ata/libata-acpi.c
drivers/char/Kconfig
drivers/char/Makefile
drivers/char/ramoops.c
drivers/char/tile-srom.c [new file with mode: 0644]
drivers/char/tpm/tpm_tis.c
drivers/firmware/efivars.c
drivers/input/serio/xilinx_ps2.c
drivers/md/Kconfig
drivers/md/dm-crypt.c
drivers/md/dm-flakey.c
drivers/md/dm-io.c
drivers/md/dm-ioctl.c
drivers/md/dm-kcopyd.c
drivers/md/dm-log-userspace-base.c
drivers/md/dm-log.c
drivers/md/dm-mpath.c
drivers/md/dm-raid.c
drivers/md/dm-snap-persistent.c
drivers/md/dm-snap.c
drivers/md/dm-table.c
drivers/md/dm.c
drivers/md/dm.h
drivers/of/address.c
drivers/of/base.c
drivers/of/fdt.c
drivers/pci/hotplug/acpiphp_glue.c
drivers/rtc/rtc-omap.c
drivers/spi/spi-pl022.c
drivers/target/iscsi/Kconfig
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_configfs.c
drivers/target/iscsi/iscsi_target_nego.c
drivers/target/target_core_transport.c
drivers/target/tcm_fc/tcm_fc.h
drivers/target/tcm_fc/tfc_cmd.c
drivers/target/tcm_fc/tfc_io.c
drivers/thermal/Kconfig
drivers/thermal/thermal_sys.c
drivers/video/backlight/Kconfig
drivers/video/backlight/aat2870_bl.c
drivers/watchdog/Kconfig
drivers/watchdog/nv_tco.c
drivers/watchdog/shwdt.c
fs/Kconfig
fs/btrfs/Makefile
fs/btrfs/acl.c
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/dir-item.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/extent_map.c
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ref-cache.c [deleted file]
fs/btrfs/ref-cache.h [deleted file]
fs/btrfs/root-tree.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/dcache.c
fs/ext4/super.c
fs/stack.c
include/acpi/acpi_drivers.h
include/acpi/acpixf.h
include/acpi/processor.h
include/linux/acpi.h
include/linux/device-mapper.h
include/linux/dm-ioctl.h
include/linux/dm-kcopyd.h
include/linux/fault-inject.h
include/linux/gfp.h
include/linux/idr.h
include/linux/memcontrol.h
include/linux/mfd/aat2870.h
include/linux/of.h
include/linux/of_fdt.h
include/linux/radix-tree.h
include/linux/shmem_fs.h
include/linux/swapops.h
include/linux/thermal.h
init/main.c
ipc/shm.c
kernel/taskstats.c
lib/fault-inject.c
lib/idr.c
lib/radix-tree.c
mm/failslab.c
mm/filemap.c
mm/memcontrol.c
mm/mincore.c
mm/page_alloc.c
mm/shmem.c
mm/swapfile.c
mm/truncate.c
sound/core/pcm_compat.c
sound/core/rtctimer.c
sound/pci/asihpi/hpidspcd.c
sound/pci/asihpi/hpioctl.c
sound/pci/rme9652/hdspm.c
sound/soc/txx9/txx9aclc.c
tools/power/x86/turbostat/turbostat.c
tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c

index 6b5c42d..2c656ae 100644 (file)
@@ -4,7 +4,8 @@ dm-crypt
 Device-Mapper's "crypt" target provides transparent encryption of block devices
 using the kernel crypto API.
 
-Parameters: <cipher> <key> <iv_offset> <device path> <offset>
+Parameters: <cipher> <key> <iv_offset> <device path> \
+             <offset> [<#opt_params> <opt_params>]
 
 <cipher>
     Encryption cipher and an optional IV generation mode.
@@ -37,6 +38,24 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset>
 <offset>
     Starting sector within the device where the encrypted data begins.
 
+<#opt_params>
+    Number of optional parameters. If there are no optional parameters,
+    the optional paramaters section can be skipped or #opt_params can be zero.
+    Otherwise #opt_params is the number of following arguments.
+
+    Example of optional parameters section:
+        1 allow_discards
+
+allow_discards
+    Block discard requests (a.k.a. TRIM) are passed through the crypt device.
+    The default is to ignore discard requests.
+
+    WARNING: Assess the specific security risks carefully before enabling this
+    option.  For example, allowing discards on encrypted devices may lead to
+    the leak of information about the ciphertext device (filesystem type,
+    used space etc.) if the discarded blocks can be located easily on the
+    device later.
+
 Example scripts
 ===============
 LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
index c8efdfd..6ff5c23 100644 (file)
@@ -1,17 +1,53 @@
 dm-flakey
 =========
 
-This target is the same as the linear target except that it returns I/O
-errors periodically.  It's been found useful in simulating failing
-devices for testing purposes.
+This target is the same as the linear target except that it exhibits
+unreliable behaviour periodically.  It's been found useful in simulating
+failing devices for testing purposes.
 
 Starting from the time the table is loaded, the device is available for
-<up interval> seconds, then returns errors for <down interval> seconds,
-and then this cycle repeats.
+<up interval> seconds, then exhibits unreliable behaviour for <down
+interval> seconds, and then this cycle repeats.
 
-Parameters: <dev path> <offset> <up interval> <down interval>
+Also, consider using this in combination with the dm-delay target too,
+which can delay reads and writes and/or send them to different
+underlying devices.
+
+Table parameters
+----------------
+  <dev path> <offset> <up interval> <down interval> \
+    [<num_features> [<feature arguments>]]
+
+Mandatory parameters:
     <dev path>: Full pathname to the underlying block-device, or a
                 "major:minor" device-number.
     <offset>: Starting sector within the device.
     <up interval>: Number of seconds device is available.
     <down interval>: Number of seconds device returns errors.
+
+Optional feature parameters:
+  If no feature parameters are present, during the periods of
+  unreliability, all I/O returns errors.
+
+  drop_writes:
+       All write I/O is silently ignored.
+       Read I/O is handled correctly.
+
+  corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
+       During <down interval>, replace <Nth_byte> of the data of
+       each matching bio with <value>.
+
+    <Nth_byte>: The offset of the byte to replace.
+               Counting starts at 1, to replace the first byte.
+    <direction>: Either 'r' to corrupt reads or 'w' to corrupt writes.
+                'w' is incompatible with drop_writes.
+    <value>: The value (from 0-255) to write.
+    <flags>: Perform the replacement only if bio->bi_rw has all the
+            selected flags set.
+
+Examples:
+  corrupt_bio_byte 32 r 1 0
+       - replaces the 32nd byte of READ bios with the value 1
+
+  corrupt_bio_byte 224 w 0 32
+       - replaces the 224th byte of REQ_META (=32) bios with the value 0
index 33b6b70..2a8c113 100644 (file)
-Device-mapper RAID (dm-raid) is a bridge from DM to MD.  It
-provides a way to use device-mapper interfaces to access the MD RAID
-drivers.
+dm-raid
+-------
 
-As with all device-mapper targets, the nominal public interfaces are the
-constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO
-and STATUSTYPE_TABLE).  The CTR table looks like the following:
+The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
+It allows the MD RAID drivers to be accessed using a device-mapper
+interface.
 
-1: <s> <l> raid \
-2:      <raid_type> <#raid_params> <raid_params> \
-3:      <#raid_devs> <meta_dev1> <dev1> .. <meta_devN> <devN>
-
-Line 1 contains the standard first three arguments to any device-mapper
-target - the start, length, and target type fields.  The target type in
-this case is "raid".
-
-Line 2 contains the arguments that define the particular raid
-type/personality/level, the required arguments for that raid type, and
-any optional arguments.  Possible raid types include: raid4, raid5_la,
-raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc.  (raid1 is
-planned for the future.)  The list of required and optional parameters
-is the same for all the current raid types.  The required parameters are
-positional, while the optional parameters are given as key/value pairs.
-The possible parameters are as follows:
- <chunk_size>           Chunk size in sectors.
- [[no]sync]             Force/Prevent RAID initialization
- [rebuild <idx>]        Rebuild the drive indicated by the index
- [daemon_sleep <ms>]    Time between bitmap daemon work to clear bits
- [min_recovery_rate <kB/sec/disk>]      Throttle RAID initialization
- [max_recovery_rate <kB/sec/disk>]      Throttle RAID initialization
- [max_write_behind <sectors>]           See '-write-behind=' (man mdadm)
- [stripe_cache <sectors>]               Stripe cache size for higher RAIDs
-
-Line 3 contains the list of devices that compose the array in
-metadata/data device pairs.  If the metadata is stored separately, a '-'
-is given for the metadata device position.  If a drive has failed or is
-missing at creation time, a '-' can be given for both the metadata and
-data drives for a given position.
-
-NB. Currently all metadata devices must be specified as '-'.
-
-Examples:
-# RAID4 - 4 data drives, 1 parity
+The target is named "raid" and it accepts the following parameters:
+
+  <raid_type> <#raid_params> <raid_params> \
+    <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
+
+<raid_type>:
+  raid1                RAID1 mirroring
+  raid4                RAID4 dedicated parity disk
+  raid5_la     RAID5 left asymmetric
+               - rotating parity 0 with data continuation
+  raid5_ra     RAID5 right asymmetric
+               - rotating parity N with data continuation
+  raid5_ls     RAID5 left symmetric
+               - rotating parity 0 with data restart
+  raid5_rs     RAID5 right symmetric
+               - rotating parity N with data restart
+  raid6_zr     RAID6 zero restart
+               - rotating parity zero (left-to-right) with data restart
+  raid6_nr     RAID6 N restart
+               - rotating parity N (right-to-left) with data restart
+  raid6_nc     RAID6 N continue
+               - rotating parity N (right-to-left) with data continuation
+
+  Refererence: Chapter 4 of
+  http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
+
+<#raid_params>: The number of parameters that follow.
+
+<raid_params> consists of
+    Mandatory parameters:
+        <chunk_size>: Chunk size in sectors.  This parameter is often known as
+                     "stripe size".  It is the only mandatory parameter and
+                     is placed first.
+
+    followed by optional parameters (in any order):
+       [sync|nosync]   Force or prevent RAID initialization.
+
+       [rebuild <idx>] Rebuild drive number idx (first drive is 0).
+
+       [daemon_sleep <ms>]
+               Interval between runs of the bitmap daemon that
+               clear bits.  A longer interval means less bitmap I/O but
+               resyncing after a failure is likely to take longer.
+
+       [min_recovery_rate <kB/sec/disk>]  Throttle RAID initialization
+       [max_recovery_rate <kB/sec/disk>]  Throttle RAID initialization
+       [write_mostly <idx>]               Drive index is write-mostly
+       [max_write_behind <sectors>]       See '-write-behind=' (man mdadm)
+       [stripe_cache <sectors>]           Stripe cache size (higher RAIDs only)
+       [region_size <sectors>]
+               The region_size multiplied by the number of regions is the
+               logical size of the array.  The bitmap records the device
+               synchronisation state for each region.
+
+<#raid_devs>: The number of devices composing the array.
+       Each device consists of two entries.  The first is the device
+       containing the metadata (if any); the second is the one containing the
+       data.
+
+       If a drive has failed or is missing at creation time, a '-' can be
+       given for both the metadata and data drives for a given position.
+
+
+Example tables
+--------------
+# RAID4 - 4 data drives, 1 parity (no metadata devices)
 # No metadata devices specified to hold superblock/bitmap info
 # Chunk size of 1MiB
 # (Lines separated for easy reading)
+
 0 1960893648 raid \
         raid4 1 2048 \
         5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
 
-# RAID4 - 4 data drives, 1 parity (no metadata devices)
+# RAID4 - 4 data drives, 1 parity (with metadata devices)
 # Chunk size of 1MiB, force RAID initialization,
 #       min recovery rate at 20 kiB/sec/disk
+
 0 1960893648 raid \
-        raid4 4 2048 min_recovery_rate 20 sync\
-        5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
+        raid4 4 2048 sync min_recovery_rate 20 \
+        5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
 
-Performing a 'dmsetup table' should display the CTR table used to
-construct the mapping (with possible reordering of optional
-parameters).
+'dmsetup table' displays the table used to construct the mapping.
+The optional parameters are always printed in the order listed
+above with "sync" or "nosync" always output ahead of the other
+arguments, regardless of the order used when originally loading the table.
+Arguments that can be repeated are ordered by value.
 
-Performing a 'dmsetup status' will yield information on the state and
-health of the array.  The output is as follows:
+'dmsetup status' yields information on the state and health of the
+array.
+The output is as follows:
 1: <s> <l> raid \
 2:      <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
 
-Line 1 is standard DM output.  Line 2 is best shown by example:
+Line 1 is the standard output produced by device-mapper.
+Line 2 is produced by the raid target, and best explained by example:
         0 1960893648 raid raid4 5 AAAAA 2/490221568
 Here we can see the RAID type is raid4, there are 5 devices - all of
 which are 'A'live, and the array is 2/490221568 complete with recovery.
+Faulty or missing devices are marked 'D'.  Devices that are out-of-sync
+are marked 'a'.
index 7be15e4..82a5d25 100644 (file)
@@ -143,8 +143,7 @@ o provide a way to configure fault attributes
   failslab, fail_page_alloc, and fail_make_request use this way.
   Helper functions:
 
-       init_fault_attr_dentries(entries, attr, name);
-       void cleanup_fault_attr_dentries(entries);
+       fault_create_debugfs_attr(name, parent, attr);
 
 - module parameters
 
index ea0bace..43f4809 100644 (file)
@@ -296,15 +296,6 @@ Who:       Ravikiran Thirumalai <kiran@scalex86.org>
 
 ---------------------------
 
-What:  CONFIG_THERMAL_HWMON
-When:  January 2009
-Why:   This option was introduced just to allow older lm-sensors userspace
-       to keep working over the upgrade to 2.6.26. At the scheduled time of
-       removal fixed lm-sensors (2.x or 3.x) should be readily available.
-Who:   Rene Herman <rene.herman@gmail.com>
-
----------------------------
-
 What:  Code that is now under CONFIG_WIRELESS_EXT_SYSFS
        (in net/core/net-sysfs.c)
 When:  After the only user (hal) has seen a release with the patches
index ace200b..37c4d84 100644 (file)
@@ -106,13 +106,20 @@ separated by spaces:
       To use the first on-chip serial port at baud rate 115200, no parity, 8
       bits, and no flow control.
 
-  (*) root=/dev/<xxxx>
+  (*) root=<xxxx>
 
-      This specifies the device upon which the root filesystem resides. For
-      example:
+      This specifies the device upon which the root filesystem resides. It
+      may be specified by major and minor number, device path, or even
+      partition uuid, if supported.  For example:
 
        /dev/nfs        NFS root filesystem
        /dev/mtdblock3  Fourth RedBoot partition on the System Flash
+       PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1
+               first partition after the partition with the given UUID
+       253:0           Device with major 253 and minor 0
+
+      Authoritative information can be found in
+      "Documentation/kernel-parameters.txt".
 
   (*) rw
 
index 72ba8d5..845a191 100644 (file)
@@ -292,6 +292,7 @@ Code  Seq#(hex)     Include File            Comments
                                        <mailto:buk@buks.ipn.de>
 0xA0   all     linux/sdp/sdp.h         Industrial Device Project
                                        <mailto:kenji@bitgate.com>
+0xA2   00-0F   arch/tile/include/asm/hardwall.h
 0xA3   80-8F   Port ACL                in development:
                                        <mailto:tlewis@mindspring.com>
 0xA3   90-9F   linux/dtlk.h
index 26a8374..865e39f 100644 (file)
@@ -163,6 +163,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
                        See also Documentation/power/pm.txt, pci=noacpi
 
+       acpi_rsdp=      [ACPI,EFI,KEXEC]
+                       Pass the RSDP address to the kernel, mostly used
+                       on machines running EFI runtime service to boot the
+                       second kernel for kdump.
+
        acpi_apic_instance=     [ACPI, IOAPIC]
                        Format: <int>
                        2: use 2nd APIC table, if available
@@ -2240,6 +2245,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
        ro              [KNL] Mount root device read-only on boot
 
        root=           [KNL] Root filesystem
+                       See name_to_dev_t comment in init/do_mounts.c.
 
        rootdelay=      [KNL] Delay (in seconds) to pause before attempting to
                        mount the root filesystem
index c93bed6..97d45f2 100644 (file)
@@ -129,6 +129,20 @@ decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
 the first of these. You can find out all valid major numbers by
 looking into include/linux/major.h.
 
+In addition to major and minor numbers, if the device containing your
+root partition uses a partition table format with unique partition
+identifiers, then you may use them.  For instance,
+"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF".  It is also
+possible to reference another partition on the same device using a
+known partition UUID as the starting point.  For example,
+if partition 5 of the device has the UUID of
+00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
+follows:
+  PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
+
+Authoritative information can be found in
+"Documentation/kernel-parameters.txt".
+
 
 2.2) ro, rw
 -----------
index c9c6324..6c84a21 100644 (file)
@@ -4722,6 +4722,7 @@ S:        Maintained
 F:     drivers/of
 F:     include/linux/of*.h
 K:     of_get_property
+K:     of_match_table
 
 OPENRISC ARCHITECTURE
 M:     Jonas Bonn <jonas@southpole.se>
@@ -6318,6 +6319,7 @@ F:        include/linux/sysv_fs.h
 TARGET SUBSYSTEM
 M:     Nicholas A. Bellinger <nab@linux-iscsi.org>
 L:     linux-scsi@vger.kernel.org
+L:     target-devel@vger.kernel.org
 L:     http://groups.google.com/group/linux-iscsi-target-dev
 W:     http://www.linux-iscsi.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master
index 8502653..466af40 100644 (file)
@@ -158,7 +158,7 @@ static int sync_serial_open(struct inode *inode, struct file *file);
 static int sync_serial_release(struct inode *inode, struct file *file);
 static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
 
-static int sync_serial_ioctl(struct file *file,
+static long sync_serial_ioctl(struct file *file,
        unsigned int cmd, unsigned long arg);
 static ssize_t sync_serial_write(struct file *file, const char *buf,
        size_t count, loff_t *ppos);
@@ -625,11 +625,11 @@ static int sync_serial_open(struct inode *inode, struct file *file)
                        *R_IRQ_MASK1_SET = 1 << port->data_avail_bit;
                DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev));
        }
-       ret = 0;
+       err = 0;
        
 out:
        mutex_unlock(&sync_serial_mutex);
-       return ret;
+       return err;
 }
 
 static int sync_serial_release(struct inode *inode, struct file *file)
index 907cfb5..ba0e596 100644 (file)
@@ -20,6 +20,9 @@
 #define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr));
 #define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr));
 
+extern void kgdb_init(void);
+extern void breakpoint(void);
+
 /* don't use set_int_vector, it bypasses the linux interrupt handlers. it is
  * global just so that the kernel gdb can use it.
  */
index 29b74a1..332f19c 100644 (file)
@@ -11,8 +11,6 @@
 
 #ifdef __KERNEL__
 
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 #include <asm/processor.h>
@@ -67,8 +65,10 @@ struct thread_info {
 
 #define init_thread_info       (init_thread_union.thread_info)
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 /* thread information allocation */
-#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
+#define alloc_thread_info_node(tsk, node)      \
+       ((struct thread_info *) __get_free_pages(GFP_KERNEL, 1))
 #define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
 
 #endif /* !__ASSEMBLY__ */
index 849ab2f..aec60dc 100644 (file)
@@ -2,3 +2,41 @@ include include/asm-generic/Kbuild.asm
 
 header-y += ucontext.h
 header-y += hardwall.h
+
+generic-y += bug.h
+generic-y += bugs.h
+generic-y += cputime.h
+generic-y += device.h
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipc.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += local.h
+generic-y += module.h
+generic-y += msgbuf.h
+generic-y += mutex.h
+generic-y += param.h
+generic-y += parport.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += scatterlist.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += xor.h
diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h
deleted file mode 100644 (file)
index b12fd89..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bug.h>
diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h
deleted file mode 100644 (file)
index 61791e1..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bugs.h>
diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h
deleted file mode 100644 (file)
index 6d68ad7..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/cputime.h>
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
deleted file mode 100644 (file)
index f0a4c25..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/device.h>
diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h
deleted file mode 100644 (file)
index 6cd978c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h
deleted file mode 100644 (file)
index 3711bd9..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/emergency-restart.h>
diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h
deleted file mode 100644 (file)
index 4c82b50..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/tile/include/asm/fb.h b/arch/tile/include/asm/fb.h
deleted file mode 100644 (file)
index 3a4988e..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fb.h>
diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h
deleted file mode 100644 (file)
index 46ab12d..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
index 51537ff..c66f793 100644 (file)
@@ -75,12 +75,6 @@ extern void __set_fixmap(enum fixed_addresses idx,
 
 #define set_fixmap(idx, phys) \
                __set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
-               __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
-
 #define clear_fixmap(idx) \
                __set_fixmap(idx, 0, __pgprot(0))
 
diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h
deleted file mode 100644 (file)
index b279fe0..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h
deleted file mode 100644 (file)
index ec34c76..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h
deleted file mode 100644 (file)
index a46e3d9..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipc.h>
diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h
deleted file mode 100644 (file)
index 84c7e51..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h
deleted file mode 100644 (file)
index 3dd9c0b..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
deleted file mode 100644 (file)
index 6ece1b0..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h
deleted file mode 100644 (file)
index c11c530..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h
deleted file mode 100644 (file)
index 1e4b79f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/module.h>
diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h
deleted file mode 100644 (file)
index 809134c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/msgbuf.h>
diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h
deleted file mode 100644 (file)
index ff6101a..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h
deleted file mode 100644 (file)
index 965d454..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/tile/include/asm/parport.h b/arch/tile/include/asm/parport.h
deleted file mode 100644 (file)
index cf252af..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/parport.h>
diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h
deleted file mode 100644 (file)
index c98509d..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h
deleted file mode 100644 (file)
index 22cae62..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/posix_types.h>
diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h
deleted file mode 100644 (file)
index 04bc4db..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 35d786f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/scatterlist.h>
diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h
deleted file mode 100644 (file)
index 7673b83..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sembuf.h>
diff --git a/arch/tile/include/asm/serial.h b/arch/tile/include/asm/serial.h
deleted file mode 100644 (file)
index a0cb0ca..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/serial.h>
diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h
deleted file mode 100644 (file)
index 83c05fc..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmbuf.h>
diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h
deleted file mode 100644 (file)
index 93f30de..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmparam.h>
diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h
deleted file mode 100644 (file)
index 6b71384..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/socket.h>
diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h
deleted file mode 100644 (file)
index def6d47..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sockios.h>
diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h
deleted file mode 100644 (file)
index 0b91fe1..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/statfs.h>
diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h
deleted file mode 100644 (file)
index 3935b10..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h
deleted file mode 100644 (file)
index 280d78a..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h
deleted file mode 100644 (file)
index b9e79bc..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/types.h>
diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h
deleted file mode 100644 (file)
index 9bc07b9..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ucontext.h>
diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h
deleted file mode 100644 (file)
index c82eb12..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/xor.h>
diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h
new file mode 100644 (file)
index 0000000..6395faa
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_srom_intf.h
+ * Interface definitions for the SPI Flash ROM driver.
+ */
+
+#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+
+/** Read this offset to get the total device size. */
+#define SROM_TOTAL_SIZE_OFF   0xF0000000
+
+/** Read this offset to get the device sector size. */
+#define SROM_SECTOR_SIZE_OFF  0xF0000004
+
+/** Read this offset to get the device page size. */
+#define SROM_PAGE_SIZE_OFF    0xF0000008
+
+/** Write this offset to flush any pending writes. */
+#define SROM_FLUSH_OFF        0xF1000000
+
+/** Write this offset, plus the byte offset of the start of a sector, to
+ *  erase a sector.  Any write data is ignored, but there must be at least
+ *  one byte of write data.  Only applies when the driver is in MTD mode.
+ */
+#define SROM_ERASE_OFF        0xF2000000
+
+#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */
index c4be58c..f6f50f2 100644 (file)
@@ -78,7 +78,6 @@ static struct clocksource cycle_counter_cs = {
        .rating = 300,
        .read = clocksource_get_cycles,
        .mask = CLOCKSOURCE_MASK(64),
-       .shift = 22,   /* typical value, e.g. x86 tsc uses this */
        .flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -91,8 +90,6 @@ void __init setup_clock(void)
        cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
        sched_clock_mult =
                clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT);
-       cycle_counter_cs.mult =
-               clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift);
 }
 
 void __init calibrate_delay(void)
@@ -107,7 +104,7 @@ void __init calibrate_delay(void)
 void __init time_init(void)
 {
        /* Initialize and register the clock source. */
-       clocksource_register(&cycle_counter_cs);
+       clocksource_register_hz(&cycle_counter_cs, cycles_per_sec);
 
        /* Start up the tile-timer interrupt source on the boot cpu. */
        setup_tile_timer();
index 4e10c40..7309988 100644 (file)
@@ -836,8 +836,7 @@ void __init mem_init(void)
 #endif
 
 #ifdef CONFIG_FLATMEM
-       if (!mem_map)
-               BUG();
+       BUG_ON(!mem_map);
 #endif
 
 #ifdef CONFIG_HIGHMEM
index b850bed..b627558 100644 (file)
@@ -1368,8 +1368,10 @@ static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
 
 static int __init fail_make_request_debugfs(void)
 {
-       return init_fault_attr_dentries(&fail_make_request,
-                                       "fail_make_request");
+       struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
+                                               NULL, &fail_make_request);
+
+       return IS_ERR(dir) ? PTR_ERR(dir) : 0;
 }
 
 late_initcall(fail_make_request_debugfs);
index 4f0c06c..7803548 100644 (file)
@@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q)
 
 static int __init fail_io_timeout_debugfs(void)
 {
-       return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
+       struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout",
+                                               NULL, &fail_io_timeout);
+
+       return IS_ERR(dir) ? PTR_ERR(dir) : 0;
 }
 
 late_initcall(fail_io_timeout_debugfs);
index 73863d8..76dc02f 100644 (file)
@@ -126,6 +126,12 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE);
  */
 u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE);
 
+/*
+ * Disable runtime checking and repair of values returned by control methods.
+ * Use only if the repair is causing a problem on a particular machine.
+ */
+u8 ACPI_INIT_GLOBAL(acpi_gbl_disable_auto_repair, FALSE);
+
 /* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */
 
 struct acpi_table_fadt acpi_gbl_FADT;
index c7f743c..5552125 100644 (file)
@@ -357,6 +357,7 @@ struct acpi_predefined_data {
        char *pathname;
        const union acpi_predefined_info *predefined;
        union acpi_operand_object *parent_package;
+       struct acpi_namespace_node *node;
        u32 flags;
        u8 node_flags;
 };
index 94e73c9..c445cca 100644 (file)
@@ -468,6 +468,7 @@ static const union acpi_predefined_info predefined_names[] =
        {{"_SWS", 0, ACPI_RTYPE_INTEGER}},
        {{"_TC1", 0, ACPI_RTYPE_INTEGER}},
        {{"_TC2", 0, ACPI_RTYPE_INTEGER}},
+       {{"_TDL", 0, ACPI_RTYPE_INTEGER}},
        {{"_TIP", 1, ACPI_RTYPE_INTEGER}},
        {{"_TIV", 1, ACPI_RTYPE_INTEGER}},
        {{"_TMP", 0, ACPI_RTYPE_INTEGER}},
index 9fb03fa..c845c80 100644 (file)
@@ -193,14 +193,20 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
        }
 
        /*
-        * 1) We have a return value, but if one wasn't expected, just exit, this is
-        * not a problem. For example, if the "Implicit Return" feature is
-        * enabled, methods will always return a value.
+        * Return value validation and possible repair.
         *
-        * 2) If the return value can be of any type, then we cannot perform any
-        * validation, exit.
+        * 1) Don't perform return value validation/repair if this feature
+        * has been disabled via a global option.
+        *
+        * 2) We have a return value, but if one wasn't expected, just exit,
+        * this is not a problem. For example, if the "Implicit Return"
+        * feature is enabled, methods will always return a value.
+        *
+        * 3) If the return value can be of any type, then we cannot perform
+        * any validation, just exit.
         */
-       if ((!predefined->info.expected_btypes) ||
+       if (acpi_gbl_disable_auto_repair ||
+           (!predefined->info.expected_btypes) ||
            (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) {
                goto cleanup;
        }
@@ -212,6 +218,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
                goto cleanup;
        }
        data->predefined = predefined;
+       data->node = node;
        data->node_flags = node->flags;
        data->pathname = pathname;
 
index 973883b..024c4f2 100644 (file)
@@ -503,6 +503,21 @@ acpi_ns_repair_TSS(struct acpi_predefined_data *data,
 {
        union acpi_operand_object *return_object = *return_object_ptr;
        acpi_status status;
+       struct acpi_namespace_node *node;
+
+       /*
+        * We can only sort the _TSS return package if there is no _PSS in the
+        * same scope. This is because if _PSS is present, the ACPI specification
+        * dictates that the _TSS Power Dissipation field is to be ignored, and
+        * therefore some BIOSs leave garbage values in the _TSS Power field(s).
+        * In this case, it is best to just return the _TSS package as-is.
+        * (May, 2011)
+        */
+       status =
+           acpi_ns_get_node(data->node, "^_PSS", ACPI_NS_NO_UPSEARCH, &node);
+       if (ACPI_SUCCESS(status)) {
+               return (AE_OK);
+       }
 
        status = acpi_ns_check_sorted_list(data, return_object, 5, 1,
                                           ACPI_SORT_DESCENDING,
index 48db094..62365f6 100644 (file)
@@ -126,12 +126,29 @@ acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index)
        }
 
        /*
-        * Originally, we checked the table signature for "SSDT" or "PSDT" here.
-        * Next, we added support for OEMx tables, signature "OEM".
-        * Valid tables were encountered with a null signature, so we've just
-        * given up on validating the signature, since it seems to be a waste
-        * of code. The original code was removed (05/2008).
+        * Validate the incoming table signature.
+        *
+        * 1) Originally, we checked the table signature for "SSDT" or "PSDT".
+        * 2) We added support for OEMx tables, signature "OEM".
+        * 3) Valid tables were encountered with a null signature, so we just
+        *    gave up on validating the signature, (05/2008).
+        * 4) We encountered non-AML tables such as the MADT, which caused
+        *    interpreter errors and kernel faults. So now, we once again allow
+        *    only "SSDT", "OEMx", and now, also a null signature. (05/2011).
         */
+       if ((table_desc->pointer->signature[0] != 0x00) &&
+           (!ACPI_COMPARE_NAME(table_desc->pointer->signature, ACPI_SIG_SSDT))
+           && (ACPI_STRNCMP(table_desc->pointer->signature, "OEM", 3))) {
+               ACPI_ERROR((AE_INFO,
+                           "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx",
+                           acpi_ut_valid_acpi_name(*(u32 *)table_desc->
+                                                   pointer->
+                                                   signature) ? table_desc->
+                           pointer->signature : "????",
+                           *(u32 *)table_desc->pointer->signature));
+
+               return_ACPI_STATUS(AE_BAD_SIGNATURE);
+       }
 
        (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 
index 2c66135..87c0a8d 100644 (file)
@@ -55,6 +55,9 @@
 #define ACPI_BATTERY_NOTIFY_INFO       0x81
 #define ACPI_BATTERY_NOTIFY_THRESHOLD   0x82
 
+/* Battery power unit: 0 means mW, 1 means mA */
+#define ACPI_BATTERY_POWER_UNIT_MA     1
+
 #define _COMPONENT             ACPI_BATTERY_COMPONENT
 
 ACPI_MODULE_NAME("battery");
@@ -91,11 +94,6 @@ MODULE_DEVICE_TABLE(acpi, battery_device_ids);
 enum {
        ACPI_BATTERY_ALARM_PRESENT,
        ACPI_BATTERY_XINFO_PRESENT,
-       /* For buggy DSDTs that report negative 16-bit values for either
-        * charging or discharging current and/or report 0 as 65536
-        * due to bad math.
-        */
-       ACPI_BATTERY_QUIRK_SIGNED16_CURRENT,
        ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY,
 };
 
@@ -301,7 +299,8 @@ static enum power_supply_property energy_battery_props[] = {
 #ifdef CONFIG_ACPI_PROCFS_POWER
 inline char *acpi_battery_units(struct acpi_battery *battery)
 {
-       return (battery->power_unit)?"mA":"mW";
+       return (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) ?
+               "mA" : "mW";
 }
 #endif
 
@@ -461,9 +460,17 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
        battery->update_time = jiffies;
        kfree(buffer.pointer);
 
-       if (test_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags) &&
-           battery->rate_now != -1)
+       /* For buggy DSDTs that report negative 16-bit values for either
+        * charging or discharging current and/or report 0 as 65536
+        * due to bad math.
+        */
+       if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA &&
+               battery->rate_now != ACPI_BATTERY_VALUE_UNKNOWN &&
+               (s16)(battery->rate_now) < 0) {
                battery->rate_now = abs((s16)battery->rate_now);
+               printk_once(KERN_WARNING FW_BUG "battery: (dis)charge rate"
+                       " invalid.\n");
+       }
 
        if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)
            && battery->capacity_now >= 0 && battery->capacity_now <= 100)
@@ -544,7 +551,7 @@ static int sysfs_add_battery(struct acpi_battery *battery)
 {
        int result;
 
-       if (battery->power_unit) {
+       if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) {
                battery->bat.properties = charge_battery_props;
                battery->bat.num_properties =
                        ARRAY_SIZE(charge_battery_props);
@@ -566,18 +573,16 @@ static int sysfs_add_battery(struct acpi_battery *battery)
 
 static void sysfs_remove_battery(struct acpi_battery *battery)
 {
-       if (!battery->bat.dev)
+       mutex_lock(&battery->lock);
+       if (!battery->bat.dev) {
+               mutex_unlock(&battery->lock);
                return;
+       }
+
        device_remove_file(battery->bat.dev, &alarm_attr);
        power_supply_unregister(&battery->bat);
        battery->bat.dev = NULL;
-}
-
-static void acpi_battery_quirks(struct acpi_battery *battery)
-{
-       if (dmi_name_in_vendors("Acer") && battery->power_unit) {
-               set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags);
-       }
+       mutex_unlock(&battery->lock);
 }
 
 /*
@@ -592,7 +597,7 @@ static void acpi_battery_quirks(struct acpi_battery *battery)
  *
  * Handle this correctly so that they won't break userspace.
  */
-static void acpi_battery_quirks2(struct acpi_battery *battery)
+static void acpi_battery_quirks(struct acpi_battery *battery)
 {
        if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags))
                return ;
@@ -623,13 +628,15 @@ static int acpi_battery_update(struct acpi_battery *battery)
                result = acpi_battery_get_info(battery);
                if (result)
                        return result;
-               acpi_battery_quirks(battery);
                acpi_battery_init_alarm(battery);
        }
-       if (!battery->bat.dev)
-               sysfs_add_battery(battery);
+       if (!battery->bat.dev) {
+               result = sysfs_add_battery(battery);
+               if (result)
+                       return result;
+       }
        result = acpi_battery_get_state(battery);
-       acpi_battery_quirks2(battery);
+       acpi_battery_quirks(battery);
        return result;
 }
 
@@ -863,7 +870,7 @@ DECLARE_FILE_FUNCTIONS(alarm);
                }, \
        }
 
-static struct battery_file {
+static const struct battery_file {
        struct file_operations ops;
        mode_t mode;
        const char *name;
@@ -948,9 +955,12 @@ static int battery_notify(struct notifier_block *nb,
        struct acpi_battery *battery = container_of(nb, struct acpi_battery,
                                                    pm_nb);
        switch (mode) {
+       case PM_POST_HIBERNATION:
        case PM_POST_SUSPEND:
-               sysfs_remove_battery(battery);
-               sysfs_add_battery(battery);
+               if (battery->bat.dev) {
+                       sysfs_remove_battery(battery);
+                       sysfs_add_battery(battery);
+               }
                break;
        }
 
@@ -975,25 +985,33 @@ static int acpi_battery_add(struct acpi_device *device)
        if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle,
                        "_BIX", &handle)))
                set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags);
-       acpi_battery_update(battery);
+       result = acpi_battery_update(battery);
+       if (result)
+               goto fail;
 #ifdef CONFIG_ACPI_PROCFS_POWER
        result = acpi_battery_add_fs(device);
 #endif
-       if (!result) {
-               printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
-                       ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
-                       device->status.battery_present ? "present" : "absent");
-       } else {
+       if (result) {
 #ifdef CONFIG_ACPI_PROCFS_POWER
                acpi_battery_remove_fs(device);
 #endif
-               kfree(battery);
+               goto fail;
        }
 
+       printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
+               ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
+               device->status.battery_present ? "present" : "absent");
+
        battery->pm_nb.notifier_call = battery_notify;
        register_pm_notifier(&battery->pm_nb);
 
        return result;
+
+fail:
+       sysfs_remove_battery(battery);
+       mutex_destroy(&battery->lock);
+       kfree(battery);
+       return result;
 }
 
 static int acpi_battery_remove(struct acpi_device *device, int type)
index 1864ad3..19a6113 100644 (file)
@@ -77,7 +77,7 @@ struct dock_dependent_device {
        struct list_head list;
        struct list_head hotplug_list;
        acpi_handle handle;
-       struct acpi_dock_ops *ops;
+       const struct acpi_dock_ops *ops;
        void *context;
 };
 
@@ -589,7 +589,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier);
  * the dock driver after _DCK is executed.
  */
 int
-register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops,
+register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops,
                             void *context)
 {
        struct dock_dependent_device *dd;
index 05b4420..22f918b 100644 (file)
@@ -92,7 +92,7 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf,
        return count;
 }
 
-static struct file_operations acpi_ec_io_ops = {
+static const struct file_operations acpi_ec_io_ops = {
        .owner = THIS_MODULE,
        .open  = acpi_ec_open_io,
        .read  = acpi_ec_read_io,
index 467479f..0f0356c 100644 (file)
@@ -110,7 +110,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
        return result;
 }
 
-static struct thermal_cooling_device_ops fan_cooling_ops = {
+static const struct thermal_cooling_device_ops fan_cooling_ops = {
        .get_max_state = fan_get_max_state,
        .get_cur_state = fan_get_cur_state,
        .set_cur_state = fan_set_cur_state,
index 372f9b7..fa32f58 100644 (file)
@@ -155,7 +155,7 @@ static u32 acpi_osi_handler(acpi_string interface, u32 supported)
 {
        if (!strcmp("Linux", interface)) {
 
-               printk(KERN_NOTICE FW_BUG PREFIX
+               printk_once(KERN_NOTICE FW_BUG PREFIX
                        "BIOS _OSI(Linux) query %s%s\n",
                        osi_linux.enable ? "honored" : "ignored",
                        osi_linux.cmdline ? " via cmdline" :
@@ -237,8 +237,23 @@ void acpi_os_vprintf(const char *fmt, va_list args)
 #endif
 }
 
+#ifdef CONFIG_KEXEC
+static unsigned long acpi_rsdp;
+static int __init setup_acpi_rsdp(char *arg)
+{
+       acpi_rsdp = simple_strtoul(arg, NULL, 16);
+       return 0;
+}
+early_param("acpi_rsdp", setup_acpi_rsdp);
+#endif
+
 acpi_physical_address __init acpi_os_get_root_pointer(void)
 {
+#ifdef CONFIG_KEXEC
+       if (acpi_rsdp)
+               return acpi_rsdp;
+#endif
+
        if (efi_enabled) {
                if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
                        return efi.acpi20;
@@ -1083,7 +1098,13 @@ struct osi_setup_entry {
        bool enable;
 };
 
-static struct osi_setup_entry __initdata osi_setup_entries[OSI_STRING_ENTRIES_MAX];
+static struct osi_setup_entry __initdata
+               osi_setup_entries[OSI_STRING_ENTRIES_MAX] = {
+       {"Module Device", true},
+       {"Processor Device", true},
+       {"3.0 _SCP Extensions", true},
+       {"Processor Aggregator Device", true},
+};
 
 void __init acpi_osi_setup(char *str)
 {
index f907cfb..7f9eba9 100644 (file)
@@ -303,6 +303,61 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus)
 /* --------------------------------------------------------------------------
                           PCI Interrupt Routing Support
    -------------------------------------------------------------------------- */
+#ifdef CONFIG_X86_IO_APIC
+extern int noioapicquirk;
+extern int noioapicreroute;
+
+static int bridge_has_boot_interrupt_variant(struct pci_bus *bus)
+{
+       struct pci_bus *bus_it;
+
+       for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) {
+               if (!bus_it->self)
+                       return 0;
+               if (bus_it->self->irq_reroute_variant)
+                       return bus_it->self->irq_reroute_variant;
+       }
+       return 0;
+}
+
+/*
+ * Some chipsets (e.g. Intel 6700PXH) generate a legacy INTx when the IRQ
+ * entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does
+ * during interrupt handling). When this INTx generation cannot be disabled,
+ * we reroute these interrupts to their legacy equivalent to get rid of
+ * spurious interrupts.
+ */
+static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
+                                      struct acpi_prt_entry *entry)
+{
+       if (noioapicquirk || noioapicreroute) {
+               return 0;
+       } else {
+               switch (bridge_has_boot_interrupt_variant(dev->bus)) {
+               case 0:
+                       /* no rerouting necessary */
+                       return 0;
+               case INTEL_IRQ_REROUTE_VARIANT:
+                       /*
+                        * Remap according to INTx routing table in 6700PXH
+                        * specs, intel order number 302628-002, section
+                        * 2.15.2. Other chipsets (80332, ...) have the same
+                        * mapping and are handled here as well.
+                        */
+                       dev_info(&dev->dev, "PCI IRQ %d -> rerouted to legacy "
+                                "IRQ %d\n", entry->index,
+                                (entry->index % 4) + 16);
+                       entry->index = (entry->index % 4) + 16;
+                       return 1;
+               default:
+                       dev_warn(&dev->dev, "Cannot reroute IRQ %d to legacy "
+                                "IRQ: unknown mapping\n", entry->index);
+                       return -1;
+               }
+       }
+}
+#endif /* CONFIG_X86_IO_APIC */
+
 static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
 {
        struct acpi_prt_entry *entry;
@@ -311,6 +366,9 @@ static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
 
        entry = acpi_pci_irq_find_prt_entry(dev, pin);
        if (entry) {
+#ifdef CONFIG_X86_IO_APIC
+               acpi_reroute_boot_interrupt(dev, entry);
+#endif /* CONFIG_X86_IO_APIC */
                ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n",
                                  pci_name(dev), pin_name(pin)));
                return entry;
index d06078d..2672c79 100644 (file)
@@ -485,7 +485,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
                root->secondary.end = 0xFF;
                printk(KERN_WARNING FW_BUG PREFIX
                       "no secondary bus range in _CRS\n");
-               status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,                                               NULL, &bus);
+               status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,
+                                              NULL, &bus);
                if (ACPI_SUCCESS(status))
                        root->secondary.start = bus;
                else if (status == AE_NOT_FOUND)
index 79cb653..870550d 100644 (file)
@@ -244,7 +244,7 @@ processor_set_cur_state(struct thermal_cooling_device *cdev,
        return result;
 }
 
-struct thermal_cooling_device_ops processor_cooling_ops = {
+const struct thermal_cooling_device_ops processor_cooling_ops = {
        .get_max_state = processor_get_max_state,
        .get_cur_state = processor_get_cur_state,
        .set_cur_state = processor_set_cur_state,
index 50658ff..6e36d0c 100644 (file)
@@ -130,6 +130,9 @@ struct acpi_sbs {
 
 #define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger)
 
+static int acpi_sbs_remove(struct acpi_device *device, int type);
+static int acpi_battery_get_state(struct acpi_battery *battery);
+
 static inline int battery_scale(int log)
 {
        int scale = 1;
@@ -195,6 +198,8 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
 
        if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT)
                return -ENODEV;
+
+       acpi_battery_get_state(battery);
        switch (psp) {
        case POWER_SUPPLY_PROP_STATUS:
                if (battery->rate_now < 0)
@@ -225,11 +230,17 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
        case POWER_SUPPLY_PROP_POWER_NOW:
                val->intval = abs(battery->rate_now) *
                                acpi_battery_ipscale(battery) * 1000;
+               val->intval *= (acpi_battery_mode(battery)) ?
+                               (battery->voltage_now *
+                               acpi_battery_vscale(battery) / 1000) : 1;
                break;
        case POWER_SUPPLY_PROP_CURRENT_AVG:
        case POWER_SUPPLY_PROP_POWER_AVG:
                val->intval = abs(battery->rate_avg) *
                                acpi_battery_ipscale(battery) * 1000;
+               val->intval *= (acpi_battery_mode(battery)) ?
+                               (battery->voltage_now *
+                               acpi_battery_vscale(battery) / 1000) : 1;
                break;
        case POWER_SUPPLY_PROP_CAPACITY:
                val->intval = battery->state_of_charge;
@@ -903,8 +914,6 @@ static void acpi_sbs_callback(void *context)
        }
 }
 
-static int acpi_sbs_remove(struct acpi_device *device, int type);
-
 static int acpi_sbs_add(struct acpi_device *device)
 {
        struct acpi_sbs *sbs;
index 6c94960..3ed80b2 100644 (file)
@@ -428,6 +428,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
                DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"),
                },
        },
+       {
+       .callback = init_old_suspend_ordering,
+       .ident = "Asus A8N-SLI DELUXE",
+       .matches = {
+               DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+               DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI DELUXE"),
+               },
+       },
+       {
+       .callback = init_old_suspend_ordering,
+       .ident = "Asus A8N-SLI Premium",
+       .matches = {
+               DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+               DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"),
+               },
+       },
        {},
 };
 #endif /* CONFIG_SUSPEND */
index 77255f2..c538d0e 100644 (file)
@@ -149,12 +149,12 @@ static int param_get_debug_level(char *buffer, const struct kernel_param *kp)
        return result;
 }
 
-static struct kernel_param_ops param_ops_debug_layer = {
+static const struct kernel_param_ops param_ops_debug_layer = {
        .set = param_set_uint,
        .get = param_get_debug_layer,
 };
 
-static struct kernel_param_ops param_ops_debug_level = {
+static const struct kernel_param_ops param_ops_debug_level = {
        .set = param_set_uint,
        .get = param_get_debug_level,
 };
index 2607e17..48fbc64 100644 (file)
@@ -812,7 +812,7 @@ acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal,
                                thermal_zone_unbind_cooling_device);
 }
 
-static struct thermal_zone_device_ops acpi_thermal_zone_ops = {
+static const struct thermal_zone_device_ops acpi_thermal_zone_ops = {
        .bind = acpi_thermal_bind_cooling_device,
        .unbind = acpi_thermal_unbind_cooling_device,
        .get_temp = thermal_get_temp,
index ada4b4d..08a44b5 100644 (file)
@@ -307,7 +307,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st
        return acpi_video_device_lcd_set_level(video, level);
 }
 
-static struct thermal_cooling_device_ops video_cooling_ops = {
+static const struct thermal_cooling_device_ops video_cooling_ops = {
        .get_max_state = video_get_max_state,
        .get_cur_state = video_get_cur_state,
        .set_cur_state = video_set_cur_state,
index e0a5b55..bb7c5f1 100644 (file)
@@ -218,12 +218,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data)
        ata_acpi_uevent(dev->link->ap, dev, event);
 }
 
-static struct acpi_dock_ops ata_acpi_dev_dock_ops = {
+static const struct acpi_dock_ops ata_acpi_dev_dock_ops = {
        .handler = ata_acpi_dev_notify_dock,
        .uevent = ata_acpi_dev_uevent,
 };
 
-static struct acpi_dock_ops ata_acpi_ap_dock_ops = {
+static const struct acpi_dock_ops ata_acpi_ap_dock_ops = {
        .handler = ata_acpi_ap_notify_dock,
        .uevent = ata_acpi_ap_uevent,
 };
index 49502bc..423fd56 100644 (file)
@@ -616,5 +616,16 @@ config MSM_SMD_PKT
          Enables userspace clients to read and write to some packet SMD
          ports via device interface for MSM chipset.
 
+config TILE_SROM
+       bool "Character-device access via hypervisor to the Tilera SPI ROM"
+       depends on TILE
+       default y
+       ---help---
+         This device provides character-level read-write access
+         to the SROM, typically via the "0", "1", and "2" devices
+         in /dev/srom/.  The Tilera hypervisor makes the flash
+         device appear much like a simple EEPROM, and knows
+         how to partition a single ROM for multiple purposes.
+
 endmenu
 
index 7a00672..32762ba 100644 (file)
@@ -63,3 +63,5 @@ obj-$(CONFIG_RAMOOPS)         += ramoops.o
 
 obj-$(CONFIG_JS_RTC)           += js-rtc.o
 js-rtc-y = rtc.o
+
+obj-$(CONFIG_TILE_SROM)                += tile-srom.o
index fca0c51..810aff9 100644 (file)
@@ -147,6 +147,14 @@ static int __init ramoops_probe(struct platform_device *pdev)
        cxt->phys_addr = pdata->mem_address;
        cxt->record_size = pdata->record_size;
        cxt->dump_oops = pdata->dump_oops;
+       /*
+        * Update the module parameter variables as well so they are visible
+        * through /sys/module/ramoops/parameters/
+        */
+       mem_size = pdata->mem_size;
+       mem_address = pdata->mem_address;
+       record_size = pdata->record_size;
+       dump_oops = pdata->dump_oops;
 
        if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
                pr_err("request mem region failed\n");
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
new file mode 100644 (file)
index 0000000..cf3ee00
--- /dev/null
@@ -0,0 +1,481 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * SPI Flash ROM driver
+ *
+ * This source code is derived from code provided in "Linux Device
+ * Drivers, Third Edition", by Jonathan Corbet, Alessandro Rubini, and
+ * Greg Kroah-Hartman, published by O'Reilly Media, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/kernel.h>      /* printk() */
+#include <linux/slab.h>                /* kmalloc() */
+#include <linux/fs.h>          /* everything... */
+#include <linux/errno.h>       /* error codes */
+#include <linux/types.h>       /* size_t */
+#include <linux/proc_fs.h>
+#include <linux/fcntl.h>       /* O_ACCMODE */
+#include <linux/aio.h>
+#include <linux/pagemap.h>
+#include <linux/hugetlb.h>
+#include <linux/uaccess.h>
+#include <linux/platform_device.h>
+#include <hv/hypervisor.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/delay.h>
+#include <hv/drv_srom_intf.h>
+
+/*
+ * Size of our hypervisor I/O requests.  We break up large transfers
+ * so that we don't spend large uninterrupted spans of time in the
+ * hypervisor.  Erasing an SROM sector takes a significant fraction of
+ * a second, so if we allowed the user to, say, do one I/O to write the
+ * entire ROM, we'd get soft lockup timeouts, or worse.
+ */
+#define SROM_CHUNK_SIZE ((size_t)4096)
+
+/*
+ * When hypervisor is busy (e.g. erasing), poll the status periodically.
+ */
+
+/*
+ * Interval to poll the state in msec
+ */
+#define SROM_WAIT_TRY_INTERVAL 20
+
+/*
+ * Maximum times to poll the state
+ */
+#define SROM_MAX_WAIT_TRY_TIMES 1000
+
+struct srom_dev {
+       int hv_devhdl;                  /* Handle for hypervisor device */
+       u32 total_size;                 /* Size of this device */
+       u32 sector_size;                /* Size of a sector */
+       u32 page_size;                  /* Size of a page */
+       struct mutex lock;              /* Allow only one accessor at a time */
+};
+
+static int srom_major;                 /* Dynamic major by default */
+module_param(srom_major, int, 0);
+MODULE_AUTHOR("Tilera Corporation");
+MODULE_LICENSE("GPL");
+
+static int srom_devs;                  /* Number of SROM partitions */
+static struct cdev srom_cdev;
+static struct class *srom_class;
+static struct srom_dev *srom_devices;
+
+/*
+ * Handle calling the hypervisor and managing EAGAIN/EBUSY.
+ */
+
+static ssize_t _srom_read(int hv_devhdl, void *buf,
+                         loff_t off, size_t count)
+{
+       int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
+       for (;;) {
+               retval = hv_dev_pread(hv_devhdl, 0, (HV_VirtAddr)buf,
+                                     count, off);
+               if (retval >= 0)
+                       return retval;
+               if (retval == HV_EAGAIN)
+                       continue;
+               if (retval == HV_EBUSY && --retries > 0) {
+                       msleep(SROM_WAIT_TRY_INTERVAL);
+                       continue;
+               }
+               pr_err("_srom_read: error %d\n", retval);
+               return -EIO;
+       }
+}
+
+static ssize_t _srom_write(int hv_devhdl, const void *buf,
+                          loff_t off, size_t count)
+{
+       int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
+       for (;;) {
+               retval = hv_dev_pwrite(hv_devhdl, 0, (HV_VirtAddr)buf,
+                                      count, off);
+               if (retval >= 0)
+                       return retval;
+               if (retval == HV_EAGAIN)
+                       continue;
+               if (retval == HV_EBUSY && --retries > 0) {
+                       msleep(SROM_WAIT_TRY_INTERVAL);
+                       continue;
+               }
+               pr_err("_srom_write: error %d\n", retval);
+               return -EIO;
+       }
+}
+
+/**
+ * srom_open() - Device open routine.
+ * @inode: Inode for this device.
+ * @filp: File for this specific open of the device.
+ *
+ * Returns zero, or an error code.
+ */
+static int srom_open(struct inode *inode, struct file *filp)
+{
+       filp->private_data = &srom_devices[iminor(inode)];
+       return 0;
+}
+
+
+/**
+ * srom_release() - Device release routine.
+ * @inode: Inode for this device.
+ * @filp: File for this specific open of the device.
+ *
+ * Returns zero, or an error code.
+ */
+static int srom_release(struct inode *inode, struct file *filp)
+{
+       struct srom_dev *srom = filp->private_data;
+       char dummy;
+
+       /* Make sure we've flushed anything written to the ROM. */
+       mutex_lock(&srom->lock);
+       if (srom->hv_devhdl >= 0)
+               _srom_write(srom->hv_devhdl, &dummy, SROM_FLUSH_OFF, 1);
+       mutex_unlock(&srom->lock);
+
+       filp->private_data = NULL;
+
+       return 0;
+}
+
+
+/**
+ * srom_read() - Read data from the device.
+ * @filp: File for this specific open of the device.
+ * @buf: User's data buffer.
+ * @count: Number of bytes requested.
+ * @f_pos: File position.
+ *
+ * Returns number of bytes read, or an error code.
+ */
+static ssize_t srom_read(struct file *filp, char __user *buf,
+                        size_t count, loff_t *f_pos)
+{
+       int retval = 0;
+       void *kernbuf;
+       struct srom_dev *srom = filp->private_data;
+
+       kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
+       if (!kernbuf)
+               return -ENOMEM;
+
+       if (mutex_lock_interruptible(&srom->lock)) {
+               retval = -ERESTARTSYS;
+               kfree(kernbuf);
+               return retval;
+       }
+
+       while (count) {
+               int hv_retval;
+               int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
+
+               hv_retval = _srom_read(srom->hv_devhdl, kernbuf,
+                                      *f_pos, bytes_this_pass);
+               if (hv_retval > 0) {
+                       if (copy_to_user(buf, kernbuf, hv_retval) != 0) {
+                               retval = -EFAULT;
+                               break;
+                       }
+               } else if (hv_retval <= 0) {
+                       if (retval == 0)
+                               retval = hv_retval;
+                       break;
+               }
+
+               retval += hv_retval;
+               *f_pos += hv_retval;
+               buf += hv_retval;
+               count -= hv_retval;
+       }
+
+       mutex_unlock(&srom->lock);
+       kfree(kernbuf);
+
+       return retval;
+}
+
+/**
+ * srom_write() - Write data to the device.
+ * @filp: File for this specific open of the device.
+ * @buf: User's data buffer.
+ * @count: Number of bytes requested.
+ * @f_pos: File position.
+ *
+ * Returns number of bytes written, or an error code.
+ */
+static ssize_t srom_write(struct file *filp, const char __user *buf,
+                         size_t count, loff_t *f_pos)
+{
+       int retval = 0;
+       void *kernbuf;
+       struct srom_dev *srom = filp->private_data;
+
+       kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
+       if (!kernbuf)
+               return -ENOMEM;
+
+       if (mutex_lock_interruptible(&srom->lock)) {
+               retval = -ERESTARTSYS;
+               kfree(kernbuf);
+               return retval;
+       }
+
+       while (count) {
+               int hv_retval;
+               int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
+
+               if (copy_from_user(kernbuf, buf, bytes_this_pass) != 0) {
+                       retval = -EFAULT;
+                       break;
+               }
+
+               hv_retval = _srom_write(srom->hv_devhdl, kernbuf,
+                                       *f_pos, bytes_this_pass);
+               if (hv_retval <= 0) {
+                       if (retval == 0)
+                               retval = hv_retval;
+                       break;
+               }
+
+               retval += hv_retval;
+               *f_pos += hv_retval;
+               buf += hv_retval;
+               count -= hv_retval;
+       }
+
+       mutex_unlock(&srom->lock);
+       kfree(kernbuf);
+
+       return retval;
+}
+
+/* Provide our own implementation so we can use srom->total_size. */
+loff_t srom_llseek(struct file *filp, loff_t offset, int origin)
+{
+       struct srom_dev *srom = filp->private_data;
+
+       if (mutex_lock_interruptible(&srom->lock))
+               return -ERESTARTSYS;
+
+       switch (origin) {
+       case SEEK_END:
+               offset += srom->total_size;
+               break;
+       case SEEK_CUR:
+               offset += filp->f_pos;
+               break;
+       }
+
+       if (offset < 0 || offset > srom->total_size) {
+               offset = -EINVAL;
+       } else {
+               filp->f_pos = offset;
+               filp->f_version = 0;
+       }
+
+       mutex_unlock(&srom->lock);
+
+       return offset;
+}
+
+static ssize_t total_show(struct device *dev,
+                         struct device_attribute *attr, char *buf)
+{
+       struct srom_dev *srom = dev_get_drvdata(dev);
+       return sprintf(buf, "%u\n", srom->total_size);
+}
+
+static ssize_t sector_show(struct device *dev,
+                          struct device_attribute *attr, char *buf)
+{
+       struct srom_dev *srom = dev_get_drvdata(dev);
+       return sprintf(buf, "%u\n", srom->sector_size);
+}
+
+static ssize_t page_show(struct device *dev,
+                        struct device_attribute *attr, char *buf)
+{
+       struct srom_dev *srom = dev_get_drvdata(dev);
+       return sprintf(buf, "%u\n", srom->page_size);
+}
+
+static struct device_attribute srom_dev_attrs[] = {
+       __ATTR(total_size, S_IRUGO, total_show, NULL),
+       __ATTR(sector_size, S_IRUGO, sector_show, NULL),
+       __ATTR(page_size, S_IRUGO, page_show, NULL),
+       __ATTR_NULL
+};
+
+static char *srom_devnode(struct device *dev, mode_t *mode)
+{
+       *mode = S_IRUGO | S_IWUSR;
+       return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev));
+}
+
+/*
+ * The fops
+ */
+static const struct file_operations srom_fops = {
+       .owner =     THIS_MODULE,
+       .llseek =    srom_llseek,
+       .read =      srom_read,
+       .write =     srom_write,
+       .open =      srom_open,
+       .release =   srom_release,
+};
+
+/**
+ * srom_setup_minor() - Initialize per-minor information.
+ * @srom: Per-device SROM state.
+ * @index: Device to set up.
+ */
+static int srom_setup_minor(struct srom_dev *srom, int index)
+{
+       struct device *dev;
+       int devhdl = srom->hv_devhdl;
+
+       mutex_init(&srom->lock);
+
+       if (_srom_read(devhdl, &srom->total_size,
+                      SROM_TOTAL_SIZE_OFF, sizeof(srom->total_size)) < 0)
+               return -EIO;
+       if (_srom_read(devhdl, &srom->sector_size,
+                      SROM_SECTOR_SIZE_OFF, sizeof(srom->sector_size)) < 0)
+               return -EIO;
+       if (_srom_read(devhdl, &srom->page_size,
+                      SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0)
+               return -EIO;
+
+       dev = device_create(srom_class, &platform_bus,
+                           MKDEV(srom_major, index), srom, "%d", index);
+       return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+/** srom_init() - Initialize the driver's module. */
+static int srom_init(void)
+{
+       int result, i;
+       dev_t dev = MKDEV(srom_major, 0);
+
+       /*
+        * Start with a plausible number of partitions; the krealloc() call
+        * below will yield about log(srom_devs) additional allocations.
+        */
+       srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
+
+       /* Discover the number of srom partitions. */
+       for (i = 0; ; i++) {
+               int devhdl;
+               char buf[20];
+               struct srom_dev *new_srom_devices =
+                       krealloc(srom_devices, (i+1) * sizeof(struct srom_dev),
+                                GFP_KERNEL | __GFP_ZERO);
+               if (!new_srom_devices) {
+                       result = -ENOMEM;
+                       goto fail_mem;
+               }
+               srom_devices = new_srom_devices;
+               sprintf(buf, "srom/0/%d", i);
+               devhdl = hv_dev_open((HV_VirtAddr)buf, 0);
+               if (devhdl < 0) {
+                       if (devhdl != HV_ENODEV)
+                               pr_notice("srom/%d: hv_dev_open failed: %d.\n",
+                                         i, devhdl);
+                       break;
+               }
+               srom_devices[i].hv_devhdl = devhdl;
+       }
+       srom_devs = i;
+
+       /* Bail out early if we have no partitions at all. */
+       if (srom_devs == 0) {
+               result = -ENODEV;
+               goto fail_mem;
+       }
+
+       /* Register our major, and accept a dynamic number. */
+       if (srom_major)
+               result = register_chrdev_region(dev, srom_devs, "srom");
+       else {
+               result = alloc_chrdev_region(&dev, 0, srom_devs, "srom");
+               srom_major = MAJOR(dev);
+       }
+       if (result < 0)
+               goto fail_mem;
+
+       /* Register a character device. */
+       cdev_init(&srom_cdev, &srom_fops);
+       srom_cdev.owner = THIS_MODULE;
+       srom_cdev.ops = &srom_fops;
+       result = cdev_add(&srom_cdev, dev, srom_devs);
+       if (result < 0)
+               goto fail_chrdev;
+
+       /* Create a sysfs class. */
+       srom_class = class_create(THIS_MODULE, "srom");
+       if (IS_ERR(srom_class)) {
+               result = PTR_ERR(srom_class);
+               goto fail_cdev;
+       }
+       srom_class->dev_attrs = srom_dev_attrs;
+       srom_class->devnode = srom_devnode;
+
+       /* Do per-partition initialization */
+       for (i = 0; i < srom_devs; i++) {
+               result = srom_setup_minor(srom_devices + i, i);
+               if (result < 0)
+                       goto fail_class;
+       }
+
+       return 0;
+
+fail_class:
+       for (i = 0; i < srom_devs; i++)
+               device_destroy(srom_class, MKDEV(srom_major, i));
+       class_destroy(srom_class);
+fail_cdev:
+       cdev_del(&srom_cdev);
+fail_chrdev:
+       unregister_chrdev_region(dev, srom_devs);
+fail_mem:
+       kfree(srom_devices);
+       return result;
+}
+
+/** srom_cleanup() - Clean up the driver's module. */
+static void srom_cleanup(void)
+{
+       int i;
+       for (i = 0; i < srom_devs; i++)
+               device_destroy(srom_class, MKDEV(srom_major, i));
+       class_destroy(srom_class);
+       cdev_del(&srom_cdev);
+       unregister_chrdev_region(MKDEV(srom_major, 0), srom_devs);
+       kfree(srom_devices);
+}
+
+module_init(srom_init);
+module_exit(srom_cleanup);
index 7fc2f10..3f4051a 100644 (file)
@@ -80,7 +80,7 @@ enum tis_defaults {
 static LIST_HEAD(tis_chips);
 static DEFINE_SPINLOCK(tis_lock);
 
-#ifdef CONFIG_PNP
+#if defined(CONFIG_PNP) && defined(CONFIG_ACPI)
 static int is_itpm(struct pnp_dev *dev)
 {
        struct acpi_device *acpi = pnp_acpi_device(dev);
@@ -93,6 +93,11 @@ static int is_itpm(struct pnp_dev *dev)
 
        return 0;
 }
+#else
+static inline int is_itpm(struct pnp_dev *dev)
+{
+       return 0;
+}
 #endif
 
 static int check_locality(struct tpm_chip *chip, int l)
index eacb05e..eb80b54 100644 (file)
@@ -157,7 +157,7 @@ utf16_strnlen(efi_char16_t *s, size_t maxlength)
        return length;
 }
 
-static unsigned long
+static inline unsigned long
 utf16_strlen(efi_char16_t *s)
 {
        return utf16_strnlen(s, ~0UL);
@@ -580,8 +580,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
        return -1;
 }
 
-static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size,
-                           struct pstore_info *psi)
+static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
+                           size_t size, struct pstore_info *psi)
 {
        return 0;
 }
index 80baa53..d64c5a4 100644 (file)
@@ -23,7 +23,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/io.h>
-
+#include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 
index 8420129..f75a66e 100644 (file)
@@ -241,12 +241,13 @@ config DM_MIRROR
          needed for live data migration tools such as 'pvmove'.
 
 config DM_RAID
-       tristate "RAID 4/5/6 target (EXPERIMENTAL)"
+       tristate "RAID 1/4/5/6 target (EXPERIMENTAL)"
        depends on BLK_DEV_DM && EXPERIMENTAL
+       select MD_RAID1
        select MD_RAID456
        select BLK_DEV_MD
        ---help---
-        A dm target that supports RAID4, RAID5 and RAID6 mappings
+        A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings
 
         A RAID-5 set of N drives with a capacity of C MB per drive provides
         the capacity of C * (N - 1) MB, and protects against a failure
index bae6c4e..49da55c 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/device-mapper.h>
 
 #define DM_MSG_PREFIX "crypt"
-#define MESG_STR(x) x, sizeof(x)
 
 /*
  * context holding the current state of a multi-part conversion
@@ -239,7 +238,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
                              struct dm_crypt_request *dmreq)
 {
        memset(iv, 0, cc->iv_size);
-       *(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
+       *(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
 
        return 0;
 }
@@ -248,7 +247,7 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv,
                                struct dm_crypt_request *dmreq)
 {
        memset(iv, 0, cc->iv_size);
-       *(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
+       *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
 
        return 0;
 }
@@ -415,7 +414,7 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv,
        struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private;
 
        memset(iv, 0, cc->iv_size);
-       *(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
+       *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
        crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
 
        return 0;
@@ -1575,11 +1574,17 @@ bad_mem:
 static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
        struct crypt_config *cc;
-       unsigned int key_size;
+       unsigned int key_size, opt_params;
        unsigned long long tmpll;
        int ret;
+       struct dm_arg_set as;
+       const char *opt_string;
+
+       static struct dm_arg _args[] = {
+               {0, 1, "Invalid number of feature args"},
+       };
 
-       if (argc != 5) {
+       if (argc < 5) {
                ti->error = "Not enough arguments";
                return -EINVAL;
        }
@@ -1648,6 +1653,30 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        }
        cc->start = tmpll;
 
+       argv += 5;
+       argc -= 5;
+
+       /* Optional parameters */
+       if (argc) {
+               as.argc = argc;
+               as.argv = argv;
+
+               ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
+               if (ret)
+                       goto bad;
+
+               opt_string = dm_shift_arg(&as);
+
+               if (opt_params == 1 && opt_string &&
+                   !strcasecmp(opt_string, "allow_discards"))
+                       ti->num_discard_requests = 1;
+               else if (opt_params) {
+                       ret = -EINVAL;
+                       ti->error = "Invalid feature arguments";
+                       goto bad;
+               }
+       }
+
        ret = -ENOMEM;
        cc->io_queue = alloc_workqueue("kcryptd_io",
                                       WQ_NON_REENTRANT|
@@ -1682,9 +1711,16 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
        struct dm_crypt_io *io;
        struct crypt_config *cc;
 
-       if (bio->bi_rw & REQ_FLUSH) {
+       /*
+        * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues.
+        * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight
+        * - for REQ_DISCARD caller must use flush if IO ordering matters
+        */
+       if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
                cc = ti->private;
                bio->bi_bdev = cc->dev->bdev;
+               if (bio_sectors(bio))
+                       bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector);
                return DM_MAPIO_REMAPPED;
        }
 
@@ -1727,6 +1763,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
 
                DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
                                cc->dev->name, (unsigned long long)cc->start);
+
+               if (ti->num_discard_requests)
+                       DMEMIT(" 1 allow_discards");
+
                break;
        }
        return 0;
@@ -1770,12 +1810,12 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
        if (argc < 2)
                goto error;
 
-       if (!strnicmp(argv[0], MESG_STR("key"))) {
+       if (!strcasecmp(argv[0], "key")) {
                if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) {
                        DMWARN("not suspended during key manipulation.");
                        return -EINVAL;
                }
-               if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) {
+               if (argc == 3 && !strcasecmp(argv[1], "set")) {
                        ret = crypt_set_key(cc, argv[2]);
                        if (ret)
                                return ret;
@@ -1783,7 +1823,7 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
                                ret = cc->iv_gen_ops->init(cc);
                        return ret;
                }
-               if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) {
+               if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
                        if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
                                ret = cc->iv_gen_ops->wipe(cc);
                                if (ret)
@@ -1823,7 +1863,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
 
 static struct target_type crypt_target = {
        .name   = "crypt",
-       .version = {1, 10, 0},
+       .version = {1, 11, 0},
        .module = THIS_MODULE,
        .ctr    = crypt_ctr,
        .dtr    = crypt_dtr,
index ea79062..89f73ca 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2003 Sistina Software (UK) Limited.
- * Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved.
  *
  * This file is released under the GPL.
  */
@@ -15,6 +15,9 @@
 
 #define DM_MSG_PREFIX "flakey"
 
+#define all_corrupt_bio_flags_match(bio, fc)   \
+       (((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags)
+
 /*
  * Flakey: Used for testing only, simulates intermittent,
  * catastrophic device failure.
@@ -25,60 +28,189 @@ struct flakey_c {
        sector_t start;
        unsigned up_interval;
        unsigned down_interval;
+       unsigned long flags;
+       unsigned corrupt_bio_byte;
+       unsigned corrupt_bio_rw;
+       unsigned corrupt_bio_value;
+       unsigned corrupt_bio_flags;
+};
+
+enum feature_flag_bits {
+       DROP_WRITES
 };
 
+static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
+                         struct dm_target *ti)
+{
+       int r;
+       unsigned argc;
+       const char *arg_name;
+
+       static struct dm_arg _args[] = {
+               {0, 6, "Invalid number of feature args"},
+               {1, UINT_MAX, "Invalid corrupt bio byte"},
+               {0, 255, "Invalid corrupt value to write into bio byte (0-255)"},
+               {0, UINT_MAX, "Invalid corrupt bio flags mask"},
+       };
+
+       /* No feature arguments supplied. */
+       if (!as->argc)
+               return 0;
+
+       r = dm_read_arg_group(_args, as, &argc, &ti->error);
+       if (r)
+               return r;
+
+       while (argc) {
+               arg_name = dm_shift_arg(as);
+               argc--;
+
+               /*
+                * drop_writes
+                */
+               if (!strcasecmp(arg_name, "drop_writes")) {
+                       if (test_and_set_bit(DROP_WRITES, &fc->flags)) {
+                               ti->error = "Feature drop_writes duplicated";
+                               return -EINVAL;
+                       }
+
+                       continue;
+               }
+
+               /*
+                * corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>
+                */
+               if (!strcasecmp(arg_name, "corrupt_bio_byte")) {
+                       if (!argc)
+                               ti->error = "Feature corrupt_bio_byte requires parameters";
+
+                       r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error);
+                       if (r)
+                               return r;
+                       argc--;
+
+                       /*
+                        * Direction r or w?
+                        */
+                       arg_name = dm_shift_arg(as);
+                       if (!strcasecmp(arg_name, "w"))
+                               fc->corrupt_bio_rw = WRITE;
+                       else if (!strcasecmp(arg_name, "r"))
+                               fc->corrupt_bio_rw = READ;
+                       else {
+                               ti->error = "Invalid corrupt bio direction (r or w)";
+                               return -EINVAL;
+                       }
+                       argc--;
+
+                       /*
+                        * Value of byte (0-255) to write in place of correct one.
+                        */
+                       r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error);
+                       if (r)
+                               return r;
+                       argc--;
+
+                       /*
+                        * Only corrupt bios with these flags set.
+                        */
+                       r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error);
+                       if (r)
+                               return r;
+                       argc--;
+
+                       continue;
+               }
+
+               ti->error = "Unrecognised flakey feature requested";
+               return -EINVAL;
+       }
+
+       if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
+               ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 /*
- * Construct a flakey mapping: <dev_path> <offset> <up interval> <down interval>
+ * Construct a flakey mapping:
+ * <dev_path> <offset> <up interval> <down interval> [<#feature args> [<arg>]*]
+ *
+ *   Feature args:
+ *     [drop_writes]
+ *     [corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>]
+ *
+ *   Nth_byte starts from 1 for the first byte.
+ *   Direction is r for READ or w for WRITE.
+ *   bio_flags is ignored if 0.
  */
 static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
+       static struct dm_arg _args[] = {
+               {0, UINT_MAX, "Invalid up interval"},
+               {0, UINT_MAX, "Invalid down interval"},
+       };
+
+       int r;
        struct flakey_c *fc;
-       unsigned long long tmp;
+       unsigned long long tmpll;
+       struct dm_arg_set as;
+       const char *devname;
 
-       if (argc != 4) {
-               ti->error = "dm-flakey: Invalid argument count";
+       as.argc = argc;
+       as.argv = argv;
+
+       if (argc < 4) {
+               ti->error = "Invalid argument count";
                return -EINVAL;
        }
 
-       fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+       fc = kzalloc(sizeof(*fc), GFP_KERNEL);
        if (!fc) {
-               ti->error = "dm-flakey: Cannot allocate linear context";
+               ti->error = "Cannot allocate linear context";
                return -ENOMEM;
        }
        fc->start_time = jiffies;
 
-       if (sscanf(argv[1], "%llu", &tmp) != 1) {
-               ti->error = "dm-flakey: Invalid device sector";
+       devname = dm_shift_arg(&as);
+
+       if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) {
+               ti->error = "Invalid device sector";
                goto bad;
        }
-       fc->start = tmp;
+       fc->start = tmpll;
 
-       if (sscanf(argv[2], "%u", &fc->up_interval) != 1) {
-               ti->error = "dm-flakey: Invalid up interval";
+       r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error);
+       if (r)
                goto bad;
-       }
 
-       if (sscanf(argv[3], "%u", &fc->down_interval) != 1) {
-               ti->error = "dm-flakey: Invalid down interval";
+       r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error);
+       if (r)
                goto bad;
-       }
 
        if (!(fc->up_interval + fc->down_interval)) {
-               ti->error = "dm-flakey: Total (up + down) interval is zero";
+               ti->error = "Total (up + down) interval is zero";
                goto bad;
        }
 
        if (fc->up_interval + fc->down_interval < fc->up_interval) {
-               ti->error = "dm-flakey: Interval overflow";
+               ti->error = "Interval overflow";
                goto bad;
        }
 
-       if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) {
-               ti->error = "dm-flakey: Device lookup failed";
+       r = parse_features(&as, fc, ti);
+       if (r)
+               goto bad;
+
+       if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) {
+               ti->error = "Device lookup failed";
                goto bad;
        }
 
        ti->num_flush_requests = 1;
+       ti->num_discard_requests = 1;
        ti->private = fc;
        return 0;
 
@@ -99,7 +231,7 @@ static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector)
 {
        struct flakey_c *fc = ti->private;
 
-       return fc->start + (bi_sector - ti->begin);
+       return fc->start + dm_target_offset(ti, bi_sector);
 }
 
 static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
@@ -111,6 +243,25 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
                bio->bi_sector = flakey_map_sector(ti, bio->bi_sector);
 }
 
+static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
+{
+       unsigned bio_bytes = bio_cur_bytes(bio);
+       char *data = bio_data(bio);
+
+       /*
+        * Overwrite the Nth byte of the data returned.
+        */
+       if (data && bio_bytes >= fc->corrupt_bio_byte) {
+               data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
+
+               DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
+                       "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
+                       bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
+                       (bio_data_dir(bio) == WRITE) ? 'w' : 'r',
+                       bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes);
+       }
+}
+
 static int flakey_map(struct dm_target *ti, struct bio *bio,
                      union map_info *map_context)
 {
@@ -119,18 +270,71 @@ static int flakey_map(struct dm_target *ti, struct bio *bio,
 
        /* Are we alive ? */
        elapsed = (jiffies - fc->start_time) / HZ;
-       if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval)
+       if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
+               /*
+                * Flag this bio as submitted while down.
+                */
+               map_context->ll = 1;
+
+               /*
+                * Map reads as normal.
+                */
+               if (bio_data_dir(bio) == READ)
+                       goto map_bio;
+
+               /*
+                * Drop writes?
+                */
+               if (test_bit(DROP_WRITES, &fc->flags)) {
+                       bio_endio(bio, 0);
+                       return DM_MAPIO_SUBMITTED;
+               }
+
+               /*
+                * Corrupt matching writes.
+                */
+               if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) {
+                       if (all_corrupt_bio_flags_match(bio, fc))
+                               corrupt_bio_data(bio, fc);
+                       goto map_bio;
+               }
+
+               /*
+                * By default, error all I/O.
+                */
                return -EIO;
+       }
 
+map_bio:
        flakey_map_bio(ti, bio);
 
        return DM_MAPIO_REMAPPED;
 }
 
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+                        int error, union map_info *map_context)
+{
+       struct flakey_c *fc = ti->private;
+       unsigned bio_submitted_while_down = map_context->ll;
+
+       /*
+        * Corrupt successful READs while in down state.
+        * If flags were specified, only corrupt those that match.
+        */
+       if (!error && bio_submitted_while_down &&
+           (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
+           all_corrupt_bio_flags_match(bio, fc))
+               corrupt_bio_data(bio, fc);
+
+       return error;
+}
+
 static int flakey_status(struct dm_target *ti, status_type_t type,
                         char *result, unsigned int maxlen)
 {
+       unsigned sz = 0;
        struct flakey_c *fc = ti->private;
+       unsigned drop_writes;
 
        switch (type) {
        case STATUSTYPE_INFO:
@@ -138,9 +342,22 @@ static int flakey_status(struct dm_target *ti, status_type_t type,
                break;
 
        case STATUSTYPE_TABLE:
-               snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name,
-                        (unsigned long long)fc->start, fc->up_interval,
-                        fc->down_interval);
+               DMEMIT("%s %llu %u %u ", fc->dev->name,
+                      (unsigned long long)fc->start, fc->up_interval,
+                      fc->down_interval);
+
+               drop_writes = test_bit(DROP_WRITES, &fc->flags);
+               DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5);
+
+               if (drop_writes)
+                       DMEMIT("drop_writes ");
+
+               if (fc->corrupt_bio_byte)
+                       DMEMIT("corrupt_bio_byte %u %c %u %u ",
+                              fc->corrupt_bio_byte,
+                              (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r',
+                              fc->corrupt_bio_value, fc->corrupt_bio_flags);
+
                break;
        }
        return 0;
@@ -177,11 +394,12 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
 
 static struct target_type flakey_target = {
        .name   = "flakey",
-       .version = {1, 1, 0},
+       .version = {1, 2, 0},
        .module = THIS_MODULE,
        .ctr    = flakey_ctr,
        .dtr    = flakey_dtr,
        .map    = flakey_map,
+       .end_io = flakey_end_io,
        .status = flakey_status,
        .ioctl  = flakey_ioctl,
        .merge  = flakey_merge,
index 2067288..ad2eba4 100644 (file)
@@ -38,6 +38,8 @@ struct io {
        struct dm_io_client *client;
        io_notify_fn callback;
        void *context;
+       void *vma_invalidate_address;
+       unsigned long vma_invalidate_size;
 } __attribute__((aligned(DM_IO_MAX_REGIONS)));
 
 static struct kmem_cache *_dm_io_cache;
@@ -116,6 +118,10 @@ static void dec_count(struct io *io, unsigned int region, int error)
                set_bit(region, &io->error_bits);
 
        if (atomic_dec_and_test(&io->count)) {
+               if (io->vma_invalidate_size)
+                       invalidate_kernel_vmap_range(io->vma_invalidate_address,
+                                                    io->vma_invalidate_size);
+
                if (io->sleeper)
                        wake_up_process(io->sleeper);
 
@@ -159,6 +165,9 @@ struct dpages {
 
        unsigned context_u;
        void *context_ptr;
+
+       void *vma_invalidate_address;
+       unsigned long vma_invalidate_size;
 };
 
 /*
@@ -377,6 +386,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
        io->sleeper = current;
        io->client = client;
 
+       io->vma_invalidate_address = dp->vma_invalidate_address;
+       io->vma_invalidate_size = dp->vma_invalidate_size;
+
        dispatch_io(rw, num_regions, where, dp, io, 1);
 
        while (1) {
@@ -415,13 +427,21 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
        io->callback = fn;
        io->context = context;
 
+       io->vma_invalidate_address = dp->vma_invalidate_address;
+       io->vma_invalidate_size = dp->vma_invalidate_size;
+
        dispatch_io(rw, num_regions, where, dp, io, 0);
        return 0;
 }
 
-static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
+static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
+                  unsigned long size)
 {
        /* Set up dpages based on memory type */
+
+       dp->vma_invalidate_address = NULL;
+       dp->vma_invalidate_size = 0;
+
        switch (io_req->mem.type) {
        case DM_IO_PAGE_LIST:
                list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
@@ -432,6 +452,11 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
                break;
 
        case DM_IO_VMA:
+               flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
+               if ((io_req->bi_rw & RW_MASK) == READ) {
+                       dp->vma_invalidate_address = io_req->mem.ptr.vma;
+                       dp->vma_invalidate_size = size;
+               }
                vm_dp_init(dp, io_req->mem.ptr.vma);
                break;
 
@@ -460,7 +485,7 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
        int r;
        struct dpages dp;
 
-       r = dp_init(io_req, &dp);
+       r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
        if (r)
                return r;
 
index 4cacdad..2e9a3ca 100644 (file)
@@ -128,6 +128,24 @@ static struct hash_cell *__get_uuid_cell(const char *str)
        return NULL;
 }
 
+static struct hash_cell *__get_dev_cell(uint64_t dev)
+{
+       struct mapped_device *md;
+       struct hash_cell *hc;
+
+       md = dm_get_md(huge_decode_dev(dev));
+       if (!md)
+               return NULL;
+
+       hc = dm_get_mdptr(md);
+       if (!hc) {
+               dm_put(md);
+               return NULL;
+       }
+
+       return hc;
+}
+
 /*-----------------------------------------------------------------
  * Inserting, removing and renaming a device.
  *---------------------------------------------------------------*/
@@ -718,25 +736,45 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
  */
 static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
 {
-       struct mapped_device *md;
-       void *mdptr = NULL;
+       struct hash_cell *hc = NULL;
 
-       if (*param->uuid)
-               return __get_uuid_cell(param->uuid);
+       if (*param->uuid) {
+               if (*param->name || param->dev)
+                       return NULL;
 
-       if (*param->name)
-               return __get_name_cell(param->name);
+               hc = __get_uuid_cell(param->uuid);
+               if (!hc)
+                       return NULL;
+       } else if (*param->name) {
+               if (param->dev)
+                       return NULL;
 
-       md = dm_get_md(huge_decode_dev(param->dev));
-       if (!md)
-               goto out;
+               hc = __get_name_cell(param->name);
+               if (!hc)
+                       return NULL;
+       } else if (param->dev) {
+               hc = __get_dev_cell(param->dev);
+               if (!hc)
+                       return NULL;
+       } else
+               return NULL;
 
-       mdptr = dm_get_mdptr(md);
-       if (!mdptr)
-               dm_put(md);
+       /*
+        * Sneakily write in both the name and the uuid
+        * while we have the cell.
+        */
+       strlcpy(param->name, hc->name, sizeof(param->name));
+       if (hc->uuid)
+               strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
+       else
+               param->uuid[0] = '\0';
 
-out:
-       return mdptr;
+       if (hc->new_map)
+               param->flags |= DM_INACTIVE_PRESENT_FLAG;
+       else
+               param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
+
+       return hc;
 }
 
 static struct mapped_device *find_device(struct dm_ioctl *param)
@@ -746,24 +784,8 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
 
        down_read(&_hash_lock);
        hc = __find_device_hash_cell(param);
-       if (hc) {
+       if (hc)
                md = hc->md;
-
-               /*
-                * Sneakily write in both the name and the uuid
-                * while we have the cell.
-                */
-               strlcpy(param->name, hc->name, sizeof(param->name));
-               if (hc->uuid)
-                       strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
-               else
-                       param->uuid[0] = '\0';
-
-               if (hc->new_map)
-                       param->flags |= DM_INACTIVE_PRESENT_FLAG;
-               else
-                       param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
-       }
        up_read(&_hash_lock);
 
        return md;
@@ -1402,6 +1424,11 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
                goto out;
        }
 
+       if (!argc) {
+               DMWARN("Empty message received.");
+               goto out;
+       }
+
        table = dm_get_live_table(md);
        if (!table)
                goto out_argv;
index 320401d..f821470 100644 (file)
@@ -224,8 +224,6 @@ struct kcopyd_job {
        unsigned int num_dests;
        struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
 
-       sector_t offset;
-       unsigned int nr_pages;
        struct page_list *pages;
 
        /*
@@ -380,7 +378,7 @@ static int run_io_job(struct kcopyd_job *job)
                .bi_rw = job->rw,
                .mem.type = DM_IO_PAGE_LIST,
                .mem.ptr.pl = job->pages,
-               .mem.offset = job->offset,
+               .mem.offset = 0,
                .notify.fn = complete_io,
                .notify.context = job,
                .client = job->kc->io_client,
@@ -397,10 +395,9 @@ static int run_io_job(struct kcopyd_job *job)
 static int run_pages_job(struct kcopyd_job *job)
 {
        int r;
+       unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
 
-       job->nr_pages = dm_div_up(job->dests[0].count + job->offset,
-                                 PAGE_SIZE >> 9);
-       r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
+       r = kcopyd_get_pages(job->kc, nr_pages, &job->pages);
        if (!r) {
                /* this job is ready for io */
                push(&job->kc->io_jobs, job);
@@ -602,8 +599,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
        job->num_dests = num_dests;
        memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
 
-       job->offset = 0;
-       job->nr_pages = 0;
        job->pages = NULL;
 
        job->fn = fn;
@@ -622,6 +617,37 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 }
 EXPORT_SYMBOL(dm_kcopyd_copy);
 
+void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
+                                dm_kcopyd_notify_fn fn, void *context)
+{
+       struct kcopyd_job *job;
+
+       job = mempool_alloc(kc->job_pool, GFP_NOIO);
+
+       memset(job, 0, sizeof(struct kcopyd_job));
+       job->kc = kc;
+       job->fn = fn;
+       job->context = context;
+
+       atomic_inc(&kc->nr_jobs);
+
+       return job;
+}
+EXPORT_SYMBOL(dm_kcopyd_prepare_callback);
+
+void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
+{
+       struct kcopyd_job *job = j;
+       struct dm_kcopyd_client *kc = job->kc;
+
+       job->read_err = read_err;
+       job->write_err = write_err;
+
+       push(&kc->complete_jobs, job);
+       wake(kc);
+}
+EXPORT_SYMBOL(dm_kcopyd_do_callback);
+
 /*
  * Cancels a kcopyd job, eg. someone might be deactivating a
  * mirror.
index aa2e0c3..1021c89 100644 (file)
@@ -394,8 +394,7 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
                        group[count] = fe->region;
                        count++;
 
-                       list_del(&fe->list);
-                       list_add(&fe->list, &tmp_list);
+                       list_move(&fe->list, &tmp_list);
 
                        type = fe->type;
                        if (count >= MAX_FLUSH_GROUP_COUNT)
index 948e3f4..3b52bb7 100644 (file)
@@ -197,15 +197,21 @@ EXPORT_SYMBOL(dm_dirty_log_destroy);
 #define MIRROR_DISK_VERSION 2
 #define LOG_OFFSET 2
 
-struct log_header {
-       uint32_t magic;
+struct log_header_disk {
+       __le32 magic;
 
        /*
         * Simple, incrementing version. no backward
         * compatibility.
         */
+       __le32 version;
+       __le64 nr_regions;
+} __packed;
+
+struct log_header_core {
+       uint32_t magic;
        uint32_t version;
-       sector_t nr_regions;
+       uint64_t nr_regions;
 };
 
 struct log_c {
@@ -239,10 +245,10 @@ struct log_c {
        int log_dev_failed;
        int log_dev_flush_failed;
        struct dm_dev *log_dev;
-       struct log_header header;
+       struct log_header_core header;
 
        struct dm_io_region header_location;
-       struct log_header *disk_header;
+       struct log_header_disk *disk_header;
 };
 
 /*
@@ -251,34 +257,34 @@ struct log_c {
  */
 static inline int log_test_bit(uint32_t *bs, unsigned bit)
 {
-       return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0;
+       return test_bit_le(bit, bs) ? 1 : 0;
 }
 
 static inline void log_set_bit(struct log_c *l,
                               uint32_t *bs, unsigned bit)
 {
-       __test_and_set_bit_le(bit, (unsigned long *) bs);
+       __set_bit_le(bit, bs);
        l->touched_cleaned = 1;
 }
 
 static inline void log_clear_bit(struct log_c *l,
                                 uint32_t *bs, unsigned bit)
 {
-       __test_and_clear_bit_le(bit, (unsigned long *) bs);
+       __clear_bit_le(bit, bs);
        l->touched_dirtied = 1;
 }
 
 /*----------------------------------------------------------------
  * Header IO
  *--------------------------------------------------------------*/
-static void header_to_disk(struct log_header *core, struct log_header *disk)
+static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk)
 {
        disk->magic = cpu_to_le32(core->magic);
        disk->version = cpu_to_le32(core->version);
        disk->nr_regions = cpu_to_le64(core->nr_regions);
 }
 
-static void header_from_disk(struct log_header *core, struct log_header *disk)
+static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk)
 {
        core->magic = le32_to_cpu(disk->magic);
        core->version = le32_to_cpu(disk->version);
@@ -486,7 +492,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
        memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size);
        lc->sync_count = (sync == NOSYNC) ? region_count : 0;
 
-       lc->recovering_bits = vmalloc(bitset_size);
+       lc->recovering_bits = vzalloc(bitset_size);
        if (!lc->recovering_bits) {
                DMWARN("couldn't allocate sync bitset");
                vfree(lc->sync_bits);
@@ -498,7 +504,6 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
                kfree(lc);
                return -ENOMEM;
        }
-       memset(lc->recovering_bits, 0, bitset_size);
        lc->sync_search = 0;
        log->context = lc;
 
@@ -739,8 +744,7 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region)
                return 0;
 
        do {
-               *region = find_next_zero_bit_le(
-                                            (unsigned long *) lc->sync_bits,
+               *region = find_next_zero_bit_le(lc->sync_bits,
                                             lc->region_count,
                                             lc->sync_search);
                lc->sync_search = *region + 1;
index c354701..5e0090e 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/atomic.h>
 
 #define DM_MSG_PREFIX "multipath"
-#define MESG_STR(x) x, sizeof(x)
 #define DM_PG_INIT_DELAY_MSECS 2000
 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
 
@@ -505,80 +504,29 @@ static void trigger_event(struct work_struct *work)
  *      <#paths> <#per-path selector args>
  *         [<path> [<arg>]* ]+ ]+
  *---------------------------------------------------------------*/
-struct param {
-       unsigned min;
-       unsigned max;
-       char *error;
-};
-
-static int read_param(struct param *param, char *str, unsigned *v, char **error)
-{
-       if (!str ||
-           (sscanf(str, "%u", v) != 1) ||
-           (*v < param->min) ||
-           (*v > param->max)) {
-               *error = param->error;
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-struct arg_set {
-       unsigned argc;
-       char **argv;
-};
-
-static char *shift(struct arg_set *as)
-{
-       char *r;
-
-       if (as->argc) {
-               as->argc--;
-               r = *as->argv;
-               as->argv++;
-               return r;
-       }
-
-       return NULL;
-}
-
-static void consume(struct arg_set *as, unsigned n)
-{
-       BUG_ON (as->argc < n);
-       as->argc -= n;
-       as->argv += n;
-}
-
-static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
+static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
                               struct dm_target *ti)
 {
        int r;
        struct path_selector_type *pst;
        unsigned ps_argc;
 
-       static struct param _params[] = {
+       static struct dm_arg _args[] = {
                {0, 1024, "invalid number of path selector args"},
        };
 
-       pst = dm_get_path_selector(shift(as));
+       pst = dm_get_path_selector(dm_shift_arg(as));
        if (!pst) {
                ti->error = "unknown path selector type";
                return -EINVAL;
        }
 
-       r = read_param(_params, shift(as), &ps_argc, &ti->error);
+       r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
        if (r) {
                dm_put_path_selector(pst);
                return -EINVAL;
        }
 
-       if (ps_argc > as->argc) {
-               dm_put_path_selector(pst);
-               ti->error = "not enough arguments for path selector";
-               return -EINVAL;
-       }
-
        r = pst->create(&pg->ps, ps_argc, as->argv);
        if (r) {
                dm_put_path_selector(pst);
@@ -587,12 +535,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
        }
 
        pg->ps.type = pst;
-       consume(as, ps_argc);
+       dm_consume_args(as, ps_argc);
 
        return 0;
 }
 
-static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
+static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
                               struct dm_target *ti)
 {
        int r;
@@ -609,7 +557,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
        if (!p)
                return ERR_PTR(-ENOMEM);
 
-       r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table),
+       r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
                          &p->path.dev);
        if (r) {
                ti->error = "error getting device";
@@ -660,16 +608,16 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
        return ERR_PTR(r);
 }
 
-static struct priority_group *parse_priority_group(struct arg_set *as,
+static struct priority_group *parse_priority_group(struct dm_arg_set *as,
                                                   struct multipath *m)
 {
-       static struct param _params[] = {
+       static struct dm_arg _args[] = {
                {1, 1024, "invalid number of paths"},
                {0, 1024, "invalid number of selector args"}
        };
 
        int r;
-       unsigned i, nr_selector_args, nr_params;
+       unsigned i, nr_selector_args, nr_args;
        struct priority_group *pg;
        struct dm_target *ti = m->ti;
 
@@ -693,26 +641,26 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
        /*
         * read the paths
         */
-       r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
+       r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
        if (r)
                goto bad;
 
-       r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
+       r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
        if (r)
                goto bad;
 
-       nr_params = 1 + nr_selector_args;
+       nr_args = 1 + nr_selector_args;
        for (i = 0; i < pg->nr_pgpaths; i++) {
                struct pgpath *pgpath;
-               struct arg_set path_args;
+               struct dm_arg_set path_args;
 
-               if (as->argc < nr_params) {
+               if (as->argc < nr_args) {
                        ti->error = "not enough path parameters";
                        r = -EINVAL;
                        goto bad;
                }
 
-               path_args.argc = nr_params;
+               path_args.argc = nr_args;
                path_args.argv = as->argv;
 
                pgpath = parse_path(&path_args, &pg->ps, ti);
@@ -723,7 +671,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
 
                pgpath->pg = pg;
                list_add_tail(&pgpath->list, &pg->pgpaths);
-               consume(as, nr_params);
+               dm_consume_args(as, nr_args);
        }
 
        return pg;
@@ -733,28 +681,23 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
        return ERR_PTR(r);
 }
 
-static int parse_hw_handler(struct arg_set *as, struct multipath *m)
+static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
 {
        unsigned hw_argc;
        int ret;
        struct dm_target *ti = m->ti;
 
-       static struct param _params[] = {
+       static struct dm_arg _args[] = {
                {0, 1024, "invalid number of hardware handler args"},
        };
 
-       if (read_param(_params, shift(as), &hw_argc, &ti->error))
+       if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
                return -EINVAL;
 
        if (!hw_argc)
                return 0;
 
-       if (hw_argc > as->argc) {
-               ti->error = "not enough arguments for hardware handler";
-               return -EINVAL;
-       }
-
-       m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
+       m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
        request_module("scsi_dh_%s", m->hw_handler_name);
        if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
                ti->error = "unknown hardware handler type";
@@ -778,7 +721,7 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
                for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
                        j = sprintf(p, "%s", as->argv[i]);
        }
-       consume(as, hw_argc - 1);
+       dm_consume_args(as, hw_argc - 1);
 
        return 0;
 fail:
@@ -787,20 +730,20 @@ fail:
        return ret;
 }
 
-static int parse_features(struct arg_set *as, struct multipath *m)
+static int parse_features(struct dm_arg_set *as, struct multipath *m)
 {
        int r;
        unsigned argc;
        struct dm_target *ti = m->ti;
-       const char *param_name;
+       const char *arg_name;
 
-       static struct param _params[] = {
+       static struct dm_arg _args[] = {
                {0, 5, "invalid number of feature args"},
                {1, 50, "pg_init_retries must be between 1 and 50"},
                {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
        };
 
-       r = read_param(_params, shift(as), &argc, &ti->error);
+       r = dm_read_arg_group(_args, as, &argc, &ti->error);
        if (r)
                return -EINVAL;
 
@@ -808,26 +751,24 @@ static int parse_features(struct arg_set *as, struct multipath *m)
                return 0;
 
        do {
-               param_name = shift(as);
+               arg_name = dm_shift_arg(as);
                argc--;
 
-               if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
+               if (!strcasecmp(arg_name, "queue_if_no_path")) {
                        r = queue_if_no_path(m, 1, 0);
                        continue;
                }
 
-               if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
+               if (!strcasecmp(arg_name, "pg_init_retries") &&
                    (argc >= 1)) {
-                       r = read_param(_params + 1, shift(as),
-                                      &m->pg_init_retries, &ti->error);
+                       r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
                        argc--;
                        continue;
                }
 
-               if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) &&
+               if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
                    (argc >= 1)) {
-                       r = read_param(_params + 2, shift(as),
-                                      &m->pg_init_delay_msecs, &ti->error);
+                       r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
                        argc--;
                        continue;
                }
@@ -842,15 +783,15 @@ static int parse_features(struct arg_set *as, struct multipath *m)
 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
                         char **argv)
 {
-       /* target parameters */
-       static struct param _params[] = {
+       /* target arguments */
+       static struct dm_arg _args[] = {
                {0, 1024, "invalid number of priority groups"},
                {0, 1024, "invalid initial priority group number"},
        };
 
        int r;
        struct multipath *m;
-       struct arg_set as;
+       struct dm_arg_set as;
        unsigned pg_count = 0;
        unsigned next_pg_num;
 
@@ -871,11 +812,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
        if (r)
                goto bad;
 
-       r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
+       r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
        if (r)
                goto bad;
 
-       r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
+       r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
        if (r)
                goto bad;
 
@@ -1505,10 +1446,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
        }
 
        if (argc == 1) {
-               if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) {
+               if (!strcasecmp(argv[0], "queue_if_no_path")) {
                        r = queue_if_no_path(m, 1, 0);
                        goto out;
-               } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) {
+               } else if (!strcasecmp(argv[0], "fail_if_no_path")) {
                        r = queue_if_no_path(m, 0, 0);
                        goto out;
                }
@@ -1519,18 +1460,18 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
                goto out;
        }
 
-       if (!strnicmp(argv[0], MESG_STR("disable_group"))) {
+       if (!strcasecmp(argv[0], "disable_group")) {
                r = bypass_pg_num(m, argv[1], 1);
                goto out;
-       } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) {
+       } else if (!strcasecmp(argv[0], "enable_group")) {
                r = bypass_pg_num(m, argv[1], 0);
                goto out;
-       } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) {
+       } else if (!strcasecmp(argv[0], "switch_group")) {
                r = switch_pg_num(m, argv[1]);
                goto out;
-       } else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
+       } else if (!strcasecmp(argv[0], "reinstate_path"))
                action = reinstate_path;
-       else if (!strnicmp(argv[0], MESG_STR("fail_path")))
+       else if (!strcasecmp(argv[0], "fail_path"))
                action = fail_path;
        else {
                DMWARN("Unrecognised multipath message received.");
index e5d8904..a002dd8 100644 (file)
@@ -8,19 +8,19 @@
 #include <linux/slab.h>
 
 #include "md.h"
+#include "raid1.h"
 #include "raid5.h"
-#include "dm.h"
 #include "bitmap.h"
 
+#include <linux/device-mapper.h>
+
 #define DM_MSG_PREFIX "raid"
 
 /*
- * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then
- * make it so the flag doesn't set anything.
+ * The following flags are used by dm-raid.c to set up the array state.
+ * They must be cleared before md_run is called.
  */
-#ifndef MD_SYNC_STATE_FORCED
-#define MD_SYNC_STATE_FORCED 0
-#endif
+#define FirstUse 10             /* rdev flag */
 
 struct raid_dev {
        /*
@@ -43,14 +43,15 @@ struct raid_dev {
 /*
  * Flags for rs->print_flags field.
  */
-#define DMPF_DAEMON_SLEEP      0x1
-#define DMPF_MAX_WRITE_BEHIND  0x2
-#define DMPF_SYNC              0x4
-#define DMPF_NOSYNC            0x8
-#define DMPF_STRIPE_CACHE      0x10
-#define DMPF_MIN_RECOVERY_RATE 0x20
-#define DMPF_MAX_RECOVERY_RATE 0x40
-
+#define DMPF_SYNC              0x1
+#define DMPF_NOSYNC            0x2
+#define DMPF_REBUILD           0x4
+#define DMPF_DAEMON_SLEEP      0x8
+#define DMPF_MIN_RECOVERY_RATE 0x10
+#define DMPF_MAX_RECOVERY_RATE 0x20
+#define DMPF_MAX_WRITE_BEHIND  0x40
+#define DMPF_STRIPE_CACHE      0x80
+#define DMPF_REGION_SIZE       0X100
 struct raid_set {
        struct dm_target *ti;
 
@@ -72,6 +73,7 @@ static struct raid_type {
        const unsigned level;           /* RAID level. */
        const unsigned algorithm;       /* RAID algorithm. */
 } raid_types[] = {
+       {"raid1",    "RAID1 (mirroring)",               0, 2, 1, 0 /* NONE */},
        {"raid4",    "RAID4 (dedicated parity disk)",   1, 2, 5, ALGORITHM_PARITY_0},
        {"raid5_la", "RAID5 (left asymmetric)",         1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
        {"raid5_ra", "RAID5 (right asymmetric)",        1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
@@ -105,7 +107,8 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
        }
 
        sectors_per_dev = ti->len;
-       if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
+       if ((raid_type->level > 1) &&
+           sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
                ti->error = "Target length not divisible by number of data devices";
                return ERR_PTR(-EINVAL);
        }
@@ -147,9 +150,16 @@ static void context_free(struct raid_set *rs)
 {
        int i;
 
-       for (i = 0; i < rs->md.raid_disks; i++)
+       for (i = 0; i < rs->md.raid_disks; i++) {
+               if (rs->dev[i].meta_dev)
+                       dm_put_device(rs->ti, rs->dev[i].meta_dev);
+               if (rs->dev[i].rdev.sb_page)
+                       put_page(rs->dev[i].rdev.sb_page);
+               rs->dev[i].rdev.sb_page = NULL;
+               rs->dev[i].rdev.sb_loaded = 0;
                if (rs->dev[i].data_dev)
                        dm_put_device(rs->ti, rs->dev[i].data_dev);
+       }
 
        kfree(rs);
 }
@@ -159,7 +169,16 @@ static void context_free(struct raid_set *rs)
  *  <meta_dev>: meta device name or '-' if missing
  *  <data_dev>: data device name or '-' if missing
  *
- * This code parses those words.
+ * The following are permitted:
+ *    - -
+ *    - <data_dev>
+ *    <meta_dev> <data_dev>
+ *
+ * The following is not allowed:
+ *    <meta_dev> -
+ *
+ * This code parses those words.  If there is a failure,
+ * the caller must use context_free to unwind the operations.
  */
 static int dev_parms(struct raid_set *rs, char **argv)
 {
@@ -182,8 +201,16 @@ static int dev_parms(struct raid_set *rs, char **argv)
                rs->dev[i].rdev.mddev = &rs->md;
 
                if (strcmp(argv[0], "-")) {
-                       rs->ti->error = "Metadata devices not supported";
-                       return -EINVAL;
+                       ret = dm_get_device(rs->ti, argv[0],
+                                           dm_table_get_mode(rs->ti->table),
+                                           &rs->dev[i].meta_dev);
+                       rs->ti->error = "RAID metadata device lookup failure";
+                       if (ret)
+                               return ret;
+
+                       rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
+                       if (!rs->dev[i].rdev.sb_page)
+                               return -ENOMEM;
                }
 
                if (!strcmp(argv[1], "-")) {
@@ -193,6 +220,10 @@ static int dev_parms(struct raid_set *rs, char **argv)
                                return -EINVAL;
                        }
 
+                       rs->ti->error = "No data device supplied with metadata device";
+                       if (rs->dev[i].meta_dev)
+                               return -EINVAL;
+
                        continue;
                }
 
@@ -204,6 +235,10 @@ static int dev_parms(struct raid_set *rs, char **argv)
                        return ret;
                }
 
+               if (rs->dev[i].meta_dev) {
+                       metadata_available = 1;
+                       rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
+               }
                rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
                list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
                if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
@@ -235,33 +270,109 @@ static int dev_parms(struct raid_set *rs, char **argv)
 }
 
 /*
+ * validate_region_size
+ * @rs
+ * @region_size:  region size in sectors.  If 0, pick a size (4MiB default).
+ *
+ * Set rs->md.bitmap_info.chunksize (which really refers to 'region size').
+ * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap.
+ *
+ * Returns: 0 on success, -EINVAL on failure.
+ */
+static int validate_region_size(struct raid_set *rs, unsigned long region_size)
+{
+       unsigned long min_region_size = rs->ti->len / (1 << 21);
+
+       if (!region_size) {
+               /*
+                * Choose a reasonable default.  All figures in sectors.
+                */
+               if (min_region_size > (1 << 13)) {
+                       DMINFO("Choosing default region size of %lu sectors",
+                              region_size);
+                       region_size = min_region_size;
+               } else {
+                       DMINFO("Choosing default region size of 4MiB");
+                       region_size = 1 << 13; /* sectors */
+               }
+       } else {
+               /*
+                * Validate user-supplied value.
+                */
+               if (region_size > rs->ti->len) {
+                       rs->ti->error = "Supplied region size is too large";
+                       return -EINVAL;
+               }
+
+               if (region_size < min_region_size) {
+                       DMERR("Supplied region_size (%lu sectors) below minimum (%lu)",
+                             region_size, min_region_size);
+                       rs->ti->error = "Supplied region size is too small";
+                       return -EINVAL;
+               }
+
+               if (!is_power_of_2(region_size)) {
+                       rs->ti->error = "Region size is not a power of 2";
+                       return -EINVAL;
+               }
+
+               if (region_size < rs->md.chunk_sectors) {
+                       rs->ti->error = "Region size is smaller than the chunk size";
+                       return -EINVAL;
+               }
+       }
+
+       /*
+        * Convert sectors to bytes.
+        */
+       rs->md.bitmap_info.chunksize = (region_size << 9);
+
+       return 0;
+}
+
+/*
  * Possible arguments are...
- * RAID456:
  *     <chunk_size> [optional_args]
  *
- * Optional args:
- *    [[no]sync]                       Force or prevent recovery of the entire array
+ * Argument definitions
+ *    <chunk_size>                     The number of sectors per disk that
+ *                                      will form the "stripe"
+ *    [[no]sync]                       Force or prevent recovery of the
+ *                                      entire array
  *    [rebuild <idx>]                  Rebuild the drive indicated by the index
- *    [daemon_sleep <ms>]              Time between bitmap daemon work to clear bits
+ *    [daemon_sleep <ms>]              Time between bitmap daemon work to
+ *                                      clear bits
  *    [min_recovery_rate <kB/sec/disk>]        Throttle RAID initialization
  *    [max_recovery_rate <kB/sec/disk>]        Throttle RAID initialization
+ *    [write_mostly <idx>]             Indicate a write mostly drive via index
  *    [max_write_behind <sectors>]     See '-write-behind=' (man mdadm)
  *    [stripe_cache <sectors>]         Stripe cache size for higher RAIDs
+ *    [region_size <sectors>]           Defines granularity of bitmap
  */
 static int parse_raid_params(struct raid_set *rs, char **argv,
                             unsigned num_raid_params)
 {
        unsigned i, rebuild_cnt = 0;
-       unsigned long value;
+       unsigned long value, region_size = 0;
        char *key;
 
        /*
         * First, parse the in-order required arguments
+        * "chunk_size" is the only argument of this type.
         */
-       if ((strict_strtoul(argv[0], 10, &value) < 0) ||
-           !is_power_of_2(value) || (value < 8)) {
+       if ((strict_strtoul(argv[0], 10, &value) < 0)) {
                rs->ti->error = "Bad chunk size";
                return -EINVAL;
+       } else if (rs->raid_type->level == 1) {
+               if (value)
+                       DMERR("Ignoring chunk size parameter for RAID 1");
+               value = 0;
+       } else if (!is_power_of_2(value)) {
+               rs->ti->error = "Chunk size must be a power of 2";
+               return -EINVAL;
+       } else if (value < 8) {
+               rs->ti->error = "Chunk size value is too small";
+               return -EINVAL;
        }
 
        rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
@@ -269,22 +380,39 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
        num_raid_params--;
 
        /*
-        * Second, parse the unordered optional arguments
+        * We set each individual device as In_sync with a completed
+        * 'recovery_offset'.  If there has been a device failure or
+        * replacement then one of the following cases applies:
+        *
+        *   1) User specifies 'rebuild'.
+        *      - Device is reset when param is read.
+        *   2) A new device is supplied.
+        *      - No matching superblock found, resets device.
+        *   3) Device failure was transient and returns on reload.
+        *      - Failure noticed, resets device for bitmap replay.
+        *   4) Device hadn't completed recovery after previous failure.
+        *      - Superblock is read and overrides recovery_offset.
+        *
+        * What is found in the superblocks of the devices is always
+        * authoritative, unless 'rebuild' or '[no]sync' was specified.
         */
-       for (i = 0; i < rs->md.raid_disks; i++)
+       for (i = 0; i < rs->md.raid_disks; i++) {
                set_bit(In_sync, &rs->dev[i].rdev.flags);
+               rs->dev[i].rdev.recovery_offset = MaxSector;
+       }
 
+       /*
+        * Second, parse the unordered optional arguments
+        */
        for (i = 0; i < num_raid_params; i++) {
-               if (!strcmp(argv[i], "nosync")) {
+               if (!strcasecmp(argv[i], "nosync")) {
                        rs->md.recovery_cp = MaxSector;
                        rs->print_flags |= DMPF_NOSYNC;
-                       rs->md.flags |= MD_SYNC_STATE_FORCED;
                        continue;
                }
-               if (!strcmp(argv[i], "sync")) {
+               if (!strcasecmp(argv[i], "sync")) {
                        rs->md.recovery_cp = 0;
                        rs->print_flags |= DMPF_SYNC;
-                       rs->md.flags |= MD_SYNC_STATE_FORCED;
                        continue;
                }
 
@@ -300,9 +428,13 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
                        return -EINVAL;
                }
 
-               if (!strcmp(key, "rebuild")) {
-                       if (++rebuild_cnt > rs->raid_type->parity_devs) {
-                               rs->ti->error = "Too many rebuild drives given";
+               if (!strcasecmp(key, "rebuild")) {
+                       rebuild_cnt++;
+                       if (((rs->raid_type->level != 1) &&
+                            (rebuild_cnt > rs->raid_type->parity_devs)) ||
+                           ((rs->raid_type->level == 1) &&
+                            (rebuild_cnt > (rs->md.raid_disks - 1)))) {
+                               rs->ti->error = "Too many rebuild devices specified for given RAID type";
                                return -EINVAL;
                        }
                        if (value > rs->md.raid_disks) {
@@ -311,7 +443,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
                        }
                        clear_bit(In_sync, &rs->dev[value].rdev.flags);
                        rs->dev[value].rdev.recovery_offset = 0;
-               } else if (!strcmp(key, "max_write_behind")) {
+                       rs->print_flags |= DMPF_REBUILD;
+               } else if (!strcasecmp(key, "write_mostly")) {
+                       if (rs->raid_type->level != 1) {
+                               rs->ti->error = "write_mostly option is only valid for RAID1";
+                               return -EINVAL;
+                       }
+                       if (value > rs->md.raid_disks) {
+                               rs->ti->error = "Invalid write_mostly drive index given";
+                               return -EINVAL;
+                       }
+                       set_bit(WriteMostly, &rs->dev[value].rdev.flags);
+               } else if (!strcasecmp(key, "max_write_behind")) {
+                       if (rs->raid_type->level != 1) {
+                               rs->ti->error = "max_write_behind option is only valid for RAID1";
+                               return -EINVAL;
+                       }
                        rs->print_flags |= DMPF_MAX_WRITE_BEHIND;
 
                        /*
@@ -324,14 +471,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
                                return -EINVAL;
                        }
                        rs->md.bitmap_info.max_write_behind = value;
-               } else if (!strcmp(key, "daemon_sleep")) {
+               } else if (!strcasecmp(key, "daemon_sleep")) {
                        rs->print_flags |= DMPF_DAEMON_SLEEP;
                        if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
                                rs->ti->error = "daemon sleep period out of range";
                                return -EINVAL;
                        }
                        rs->md.bitmap_info.daemon_sleep = value;
-               } else if (!strcmp(key, "stripe_cache")) {
+               } else if (!strcasecmp(key, "stripe_cache")) {
                        rs->print_flags |= DMPF_STRIPE_CACHE;
 
                        /*
@@ -348,20 +495,23 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
                                rs->ti->error = "Bad stripe_cache size";
                                return -EINVAL;
                        }
-               } else if (!strcmp(key, "min_recovery_rate")) {
+               } else if (!strcasecmp(key, "min_recovery_rate")) {
                        rs->print_flags |= DMPF_MIN_RECOVERY_RATE;
                        if (value > INT_MAX) {
                                rs->ti->error = "min_recovery_rate out of range";
                                return -EINVAL;
                        }
                        rs->md.sync_speed_min = (int)value;
-               } else if (!strcmp(key, "max_recovery_rate")) {
+               } else if (!strcasecmp(key, "max_recovery_rate")) {
                        rs->print_flags |= DMPF_MAX_RECOVERY_RATE;
                        if (value > INT_MAX) {
                                rs->ti->error = "max_recovery_rate out of range";
                                return -EINVAL;
                        }
                        rs->md.sync_speed_max = (int)value;
+               } else if (!strcasecmp(key, "region_size")) {
+                       rs->print_flags |= DMPF_REGION_SIZE;
+                       region_size = value;
                } else {
                        DMERR("Unable to parse RAID parameter: %s", key);
                        rs->ti->error = "Unable to parse RAID parameters";
@@ -369,6 +519,19 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
                }
        }
 
+       if (validate_region_size(rs, region_size))
+               return -EINVAL;
+
+       if (rs->md.chunk_sectors)
+               rs->ti->split_io = rs->md.chunk_sectors;
+       else
+               rs->ti->split_io = region_size;
+
+       if (rs->md.chunk_sectors)
+               rs->ti->split_io = rs->md.chunk_sectors;
+       else
+               rs->ti->split_io = region_size;
+
        /* Assume there are no metadata devices until the drives are parsed */
        rs->md.persistent = 0;
        rs->md.external = 1;
@@ -387,17 +550,351 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
 {
        struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
 
+       if (rs->raid_type->level == 1)
+               return md_raid1_congested(&rs->md, bits);
+
        return md_raid5_congested(&rs->md, bits);
 }
 
 /*
+ * This structure is never routinely used by userspace, unlike md superblocks.
+ * Devices with this superblock should only ever be accessed via device-mapper.
+ */
+#define DM_RAID_MAGIC 0x64526D44
+struct dm_raid_superblock {
+       __le32 magic;           /* "DmRd" */
+       __le32 features;        /* Used to indicate possible future changes */
+
+       __le32 num_devices;     /* Number of devices in this array. (Max 64) */
+       __le32 array_position;  /* The position of this drive in the array */
+
+       __le64 events;          /* Incremented by md when superblock updated */
+       __le64 failed_devices;  /* Bit field of devices to indicate failures */
+
+       /*
+        * This offset tracks the progress of the repair or replacement of
+        * an individual drive.
+        */
+       __le64 disk_recovery_offset;
+
+       /*
+        * This offset tracks the progress of the initial array
+        * synchronisation/parity calculation.
+        */
+       __le64 array_resync_offset;
+
+       /*
+        * RAID characteristics
+        */
+       __le32 level;
+       __le32 layout;
+       __le32 stripe_sectors;
+
+       __u8 pad[452];          /* Round struct to 512 bytes. */
+                               /* Always set to 0 when writing. */
+} __packed;
+
+static int read_disk_sb(mdk_rdev_t *rdev, int size)
+{
+       BUG_ON(!rdev->sb_page);
+
+       if (rdev->sb_loaded)
+               return 0;
+
+       if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
+               DMERR("Failed to read device superblock");
+               return -EINVAL;
+       }
+
+       rdev->sb_loaded = 1;
+
+       return 0;
+}
+
+static void super_sync(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+       mdk_rdev_t *r, *t;
+       uint64_t failed_devices;
+       struct dm_raid_superblock *sb;
+
+       sb = page_address(rdev->sb_page);
+       failed_devices = le64_to_cpu(sb->failed_devices);
+
+       rdev_for_each(r, t, mddev)
+               if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
+                       failed_devices |= (1ULL << r->raid_disk);
+
+       memset(sb, 0, sizeof(*sb));
+
+       sb->magic = cpu_to_le32(DM_RAID_MAGIC);
+       sb->features = cpu_to_le32(0);  /* No features yet */
+
+       sb->num_devices = cpu_to_le32(mddev->raid_disks);
+       sb->array_position = cpu_to_le32(rdev->raid_disk);
+
+       sb->events = cpu_to_le64(mddev->events);
+       sb->failed_devices = cpu_to_le64(failed_devices);
+
+       sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
+       sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
+
+       sb->level = cpu_to_le32(mddev->level);
+       sb->layout = cpu_to_le32(mddev->layout);
+       sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
+}
+
+/*
+ * super_load
+ *
+ * This function creates a superblock if one is not found on the device
+ * and will decide which superblock to use if there's a choice.
+ *
+ * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise
+ */
+static int super_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev)
+{
+       int ret;
+       struct dm_raid_superblock *sb;
+       struct dm_raid_superblock *refsb;
+       uint64_t events_sb, events_refsb;
+
+       rdev->sb_start = 0;
+       rdev->sb_size = sizeof(*sb);
+
+       ret = read_disk_sb(rdev, rdev->sb_size);
+       if (ret)
+               return ret;
+
+       sb = page_address(rdev->sb_page);
+       if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) {
+               super_sync(rdev->mddev, rdev);
+
+               set_bit(FirstUse, &rdev->flags);
+
+               /* Force writing of superblocks to disk */
+               set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
+
+               /* Any superblock is better than none, choose that if given */
+               return refdev ? 0 : 1;
+       }
+
+       if (!refdev)
+               return 1;
+
+       events_sb = le64_to_cpu(sb->events);
+
+       refsb = page_address(refdev->sb_page);
+       events_refsb = le64_to_cpu(refsb->events);
+
+       return (events_sb > events_refsb) ? 1 : 0;
+}
+
+static int super_init_validation(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+       int role;
+       struct raid_set *rs = container_of(mddev, struct raid_set, md);
+       uint64_t events_sb;
+       uint64_t failed_devices;
+       struct dm_raid_superblock *sb;
+       uint32_t new_devs = 0;
+       uint32_t rebuilds = 0;
+       mdk_rdev_t *r, *t;
+       struct dm_raid_superblock *sb2;
+
+       sb = page_address(rdev->sb_page);
+       events_sb = le64_to_cpu(sb->events);
+       failed_devices = le64_to_cpu(sb->failed_devices);
+
+       /*
+        * Initialise to 1 if this is a new superblock.
+        */
+       mddev->events = events_sb ? : 1;
+
+       /*
+        * Reshaping is not currently allowed
+        */
+       if ((le32_to_cpu(sb->level) != mddev->level) ||
+           (le32_to_cpu(sb->layout) != mddev->layout) ||
+           (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) {
+               DMERR("Reshaping arrays not yet supported.");
+               return -EINVAL;
+       }
+
+       /* We can only change the number of devices in RAID1 right now */
+       if ((rs->raid_type->level != 1) &&
+           (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
+               DMERR("Reshaping arrays not yet supported.");
+               return -EINVAL;
+       }
+
+       if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)))
+               mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
+
+       /*
+        * During load, we set FirstUse if a new superblock was written.
+        * There are two reasons we might not have a superblock:
+        * 1) The array is brand new - in which case, all of the
+        *    devices must have their In_sync bit set.  Also,
+        *    recovery_cp must be 0, unless forced.
+        * 2) This is a new device being added to an old array
+        *    and the new device needs to be rebuilt - in which
+        *    case the In_sync bit will /not/ be set and
+        *    recovery_cp must be MaxSector.
+        */
+       rdev_for_each(r, t, mddev) {
+               if (!test_bit(In_sync, &r->flags)) {
+                       if (!test_bit(FirstUse, &r->flags))
+                               DMERR("Superblock area of "
+                                     "rebuild device %d should have been "
+                                     "cleared.", r->raid_disk);
+                       set_bit(FirstUse, &r->flags);
+                       rebuilds++;
+               } else if (test_bit(FirstUse, &r->flags))
+                       new_devs++;
+       }
+
+       if (!rebuilds) {
+               if (new_devs == mddev->raid_disks) {
+                       DMINFO("Superblocks created for new array");
+                       set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
+               } else if (new_devs) {
+                       DMERR("New device injected "
+                             "into existing array without 'rebuild' "
+                             "parameter specified");
+                       return -EINVAL;
+               }
+       } else if (new_devs) {
+               DMERR("'rebuild' devices cannot be "
+                     "injected into an array with other first-time devices");
+               return -EINVAL;
+       } else if (mddev->recovery_cp != MaxSector) {
+               DMERR("'rebuild' specified while array is not in-sync");
+               return -EINVAL;
+       }
+
+       /*
+        * Now we set the Faulty bit for those devices that are
+        * recorded in the superblock as failed.
+        */
+       rdev_for_each(r, t, mddev) {
+               if (!r->sb_page)
+                       continue;
+               sb2 = page_address(r->sb_page);
+               sb2->failed_devices = 0;
+
+               /*
+                * Check for any device re-ordering.
+                */
+               if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) {
+                       role = le32_to_cpu(sb2->array_position);
+                       if (role != r->raid_disk) {
+                               if (rs->raid_type->level != 1) {
+                                       rs->ti->error = "Cannot change device "
+                                               "positions in RAID array";
+                                       return -EINVAL;
+                               }
+                               DMINFO("RAID1 device #%d now at position #%d",
+                                      role, r->raid_disk);
+                       }
+
+                       /*
+                        * Partial recovery is performed on
+                        * returning failed devices.
+                        */
+                       if (failed_devices & (1 << role))
+                               set_bit(Faulty, &r->flags);
+               }
+       }
+
+       return 0;
+}
+
+static int super_validate(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+       struct dm_raid_superblock *sb = page_address(rdev->sb_page);
+
+       /*
+        * If mddev->events is not set, we know we have not yet initialized
+        * the array.
+        */
+       if (!mddev->events && super_init_validation(mddev, rdev))
+               return -EINVAL;
+
+       mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */
+       rdev->mddev->bitmap_info.default_offset = 4096 >> 9;
+       if (!test_bit(FirstUse, &rdev->flags)) {
+               rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
+               if (rdev->recovery_offset != MaxSector)
+                       clear_bit(In_sync, &rdev->flags);
+       }
+
+       /*
+        * If a device comes back, set it as not In_sync and no longer faulty.
+        */
+       if (test_bit(Faulty, &rdev->flags)) {
+               clear_bit(Faulty, &rdev->flags);
+               clear_bit(In_sync, &rdev->flags);
+               rdev->saved_raid_disk = rdev->raid_disk;
+               rdev->recovery_offset = 0;
+       }
+
+       clear_bit(FirstUse, &rdev->flags);
+
+       return 0;
+}
+
+/*
+ * Analyse superblocks and select the freshest.
+ */
+static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
+{
+       int ret;
+       mdk_rdev_t *rdev, *freshest, *tmp;
+       mddev_t *mddev = &rs->md;
+
+       freshest = NULL;
+       rdev_for_each(rdev, tmp, mddev) {
+               if (!rdev->meta_bdev)
+                       continue;
+
+               ret = super_load(rdev, freshest);
+
+               switch (ret) {
+               case 1:
+                       freshest = rdev;
+                       break;
+               case 0:
+                       break;
+               default:
+                       ti->error = "Failed to load superblock";
+                       return ret;
+               }
+       }
+
+       if (!freshest)
+               return 0;
+
+       /*
+        * Validation of the freshest device provides the source of
+        * validation for the remaining devices.
+        */
+       ti->error = "Unable to assemble array: Invalid superblocks";
+       if (super_validate(mddev, freshest))
+               return -EINVAL;
+
+       rdev_for_each(rdev, tmp, mddev)
+               if ((rdev != freshest) && super_validate(mddev, rdev))
+                       return -EINVAL;
+
+       return 0;
+}
+
+/*
  * Construct a RAID4/5/6 mapping:
  * Args:
  *     <raid_type> <#raid_params> <raid_params>                \
  *     <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> }
  *
- * ** metadata devices are not supported yet, use '-' instead **
- *
  * <raid_params> varies by <raid_type>.  See 'parse_raid_params' for
  * details on possible <raid_params>.
  */
@@ -465,8 +962,12 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
        if (ret)
                goto bad;
 
+       rs->md.sync_super = super_sync;
+       ret = analyse_superblocks(ti, rs);
+       if (ret)
+               goto bad;
+
        INIT_WORK(&rs->md.event_work, do_table_event);
-       ti->split_io = rs->md.chunk_sectors;
        ti->private = rs;
 
        mutex_lock(&rs->md.reconfig_mutex);
@@ -482,6 +983,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
        rs->callbacks.congested_fn = raid_is_congested;
        dm_table_add_target_callbacks(ti->table, &rs->callbacks);
 
+       mddev_suspend(&rs->md);
        return 0;
 
 bad:
@@ -546,12 +1048,17 @@ static int raid_status(struct dm_target *ti, status_type_t type,
                break;
        case STATUSTYPE_TABLE:
                /* The string you would use to construct this array */
-               for (i = 0; i < rs->md.raid_disks; i++)
-                       if (rs->dev[i].data_dev &&
+               for (i = 0; i < rs->md.raid_disks; i++) {
+                       if ((rs->print_flags & DMPF_REBUILD) &&
+                           rs->dev[i].data_dev &&
                            !test_bit(In_sync, &rs->dev[i].rdev.flags))
-                               raid_param_cnt++; /* for rebuilds */
+                               raid_param_cnt += 2; /* for rebuilds */
+                       if (rs->dev[i].data_dev &&
+                           test_bit(WriteMostly, &rs->dev[i].rdev.flags))
+                               raid_param_cnt += 2;
+               }
 
-               raid_param_cnt += (hweight64(rs->print_flags) * 2);
+               raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2);
                if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))
                        raid_param_cnt--;
 
@@ -565,7 +1072,8 @@ static int raid_status(struct dm_target *ti, status_type_t type,
                        DMEMIT(" nosync");
 
                for (i = 0; i < rs->md.raid_disks; i++)
-                       if (rs->dev[i].data_dev &&
+                       if ((rs->print_flags & DMPF_REBUILD) &&
+                           rs->dev[i].data_dev &&
                            !test_bit(In_sync, &rs->dev[i].rdev.flags))
                                DMEMIT(" rebuild %u", i);
 
@@ -579,6 +1087,11 @@ static int raid_status(struct dm_target *ti, status_type_t type,
                if (rs->print_flags & DMPF_MAX_RECOVERY_RATE)
                        DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max);
 
+               for (i = 0; i < rs->md.raid_disks; i++)
+                       if (rs->dev[i].data_dev &&
+                           test_bit(WriteMostly, &rs->dev[i].rdev.flags))
+                               DMEMIT(" write_mostly %u", i);
+
                if (rs->print_flags & DMPF_MAX_WRITE_BEHIND)
                        DMEMIT(" max_write_behind %lu",
                               rs->md.bitmap_info.max_write_behind);
@@ -591,9 +1104,16 @@ static int raid_status(struct dm_target *ti, status_type_t type,
                               conf ? conf->max_nr_stripes * 2 : 0);
                }
 
+               if (rs->print_flags & DMPF_REGION_SIZE)
+                       DMEMIT(" region_size %lu",
+                              rs->md.bitmap_info.chunksize >> 9);
+
                DMEMIT(" %d", rs->md.raid_disks);
                for (i = 0; i < rs->md.raid_disks; i++) {
-                       DMEMIT(" -"); /* metadata device */
+                       if (rs->dev[i].meta_dev)
+                               DMEMIT(" %s", rs->dev[i].meta_dev->name);
+                       else
+                               DMEMIT(" -");
 
                        if (rs->dev[i].data_dev)
                                DMEMIT(" %s", rs->dev[i].data_dev->name);
@@ -650,12 +1170,13 @@ static void raid_resume(struct dm_target *ti)
 {
        struct raid_set *rs = ti->private;
 
+       bitmap_load(&rs->md);
        mddev_resume(&rs->md);
 }
 
 static struct target_type raid_target = {
        .name = "raid",
-       .version = {1, 0, 0},
+       .version = {1, 1, 0},
        .module = THIS_MODULE,
        .ctr = raid_ctr,
        .dtr = raid_dtr,
index 135c2f1..d1f1d70 100644 (file)
 #define NUM_SNAPSHOT_HDR_CHUNKS 1
 
 struct disk_header {
-       uint32_t magic;
+       __le32 magic;
 
        /*
         * Is this snapshot valid.  There is no way of recovering
         * an invalid snapshot.
         */
-       uint32_t valid;
+       __le32 valid;
 
        /*
         * Simple, incrementing version. no backward
         * compatibility.
         */
-       uint32_t version;
+       __le32 version;
 
        /* In sectors */
-       uint32_t chunk_size;
-};
+       __le32 chunk_size;
+} __packed;
 
 struct disk_exception {
+       __le64 old_chunk;
+       __le64 new_chunk;
+} __packed;
+
+struct core_exception {
        uint64_t old_chunk;
        uint64_t new_chunk;
 };
@@ -169,10 +174,9 @@ static int alloc_area(struct pstore *ps)
        if (!ps->area)
                goto err_area;
 
-       ps->zero_area = vmalloc(len);
+       ps->zero_area = vzalloc(len);
        if (!ps->zero_area)
                goto err_zero_area;
-       memset(ps->zero_area, 0, len);
 
        ps->header_area = vmalloc(len);
        if (!ps->header_area)
@@ -396,32 +400,32 @@ static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
 }
 
 static void read_exception(struct pstore *ps,
-                          uint32_t index, struct disk_exception *result)
+                          uint32_t index, struct core_exception *result)
 {
-       struct disk_exception *e = get_exception(ps, index);
+       struct disk_exception *de = get_exception(ps, index);
 
        /* copy it */
-       result->old_chunk = le64_to_cpu(e->old_chunk);
-       result->new_chunk = le64_to_cpu(e->new_chunk);
+       result->old_chunk = le64_to_cpu(de->old_chunk);
+       result->new_chunk = le64_to_cpu(de->new_chunk);
 }
 
 static void write_exception(struct pstore *ps,
-                           uint32_t index, struct disk_exception *de)
+                           uint32_t index, struct core_exception *e)
 {
-       struct disk_exception *e = get_exception(ps, index);
+       struct disk_exception *de = get_exception(ps, index);
 
        /* copy it */
-       e->old_chunk = cpu_to_le64(de->old_chunk);
-       e->new_chunk = cpu_to_le64(de->new_chunk);
+       de->old_chunk = cpu_to_le64(e->old_chunk);
+       de->new_chunk = cpu_to_le64(e->new_chunk);
 }
 
 static void clear_exception(struct pstore *ps, uint32_t index)
 {
-       struct disk_exception *e = get_exception(ps, index);
+       struct disk_exception *de = get_exception(ps, index);
 
        /* clear it */
-       e->old_chunk = 0;
-       e->new_chunk = 0;
+       de->old_chunk = 0;
+       de->new_chunk = 0;
 }
 
 /*
@@ -437,13 +441,13 @@ static int insert_exceptions(struct pstore *ps,
 {
        int r;
        unsigned int i;
-       struct disk_exception de;
+       struct core_exception e;
 
        /* presume the area is full */
        *full = 1;
 
        for (i = 0; i < ps->exceptions_per_area; i++) {
-               read_exception(ps, i, &de);
+               read_exception(ps, i, &e);
 
                /*
                 * If the new_chunk is pointing at the start of
@@ -451,7 +455,7 @@ static int insert_exceptions(struct pstore *ps,
                 * is we know that we've hit the end of the
                 * exceptions.  Therefore the area is not full.
                 */
-               if (de.new_chunk == 0LL) {
+               if (e.new_chunk == 0LL) {
                        ps->current_committed = i;
                        *full = 0;
                        break;
@@ -460,13 +464,13 @@ static int insert_exceptions(struct pstore *ps,
                /*
                 * Keep track of the start of the free chunks.
                 */
-               if (ps->next_free <= de.new_chunk)
-                       ps->next_free = de.new_chunk + 1;
+               if (ps->next_free <= e.new_chunk)
+                       ps->next_free = e.new_chunk + 1;
 
                /*
                 * Otherwise we add the exception to the snapshot.
                 */
-               r = callback(callback_context, de.old_chunk, de.new_chunk);
+               r = callback(callback_context, e.old_chunk, e.new_chunk);
                if (r)
                        return r;
        }
@@ -563,7 +567,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
        ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
                                  sizeof(struct disk_exception);
        ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
-                       sizeof(*ps->callbacks));
+                                  sizeof(*ps->callbacks));
        if (!ps->callbacks)
                return -ENOMEM;
 
@@ -641,12 +645,12 @@ static void persistent_commit_exception(struct dm_exception_store *store,
 {
        unsigned int i;
        struct pstore *ps = get_info(store);
-       struct disk_exception de;
+       struct core_exception ce;
        struct commit_callback *cb;
 
-       de.old_chunk = e->old_chunk;
-       de.new_chunk = e->new_chunk;
-       write_exception(ps, ps->current_committed++, &de);
+       ce.old_chunk = e->old_chunk;
+       ce.new_chunk = e->new_chunk;
+       write_exception(ps, ps->current_committed++, &ce);
 
        /*
         * Add the callback to the back of the array.  This code
@@ -670,7 +674,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
         * If we completely filled the current area, then wipe the next one.
         */
        if ((ps->current_committed == ps->exceptions_per_area) &&
-            zero_disk_area(ps, ps->current_area + 1))
+           zero_disk_area(ps, ps->current_area + 1))
                ps->valid = 0;
 
        /*
@@ -701,7 +705,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
                                    chunk_t *last_new_chunk)
 {
        struct pstore *ps = get_info(store);
-       struct disk_exception de;
+       struct core_exception ce;
        int nr_consecutive;
        int r;
 
@@ -722,9 +726,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
                ps->current_committed = ps->exceptions_per_area;
        }
 
-       read_exception(ps, ps->current_committed - 1, &de);
-       *last_old_chunk = de.old_chunk;
-       *last_new_chunk = de.new_chunk;
+       read_exception(ps, ps->current_committed - 1, &ce);
+       *last_old_chunk = ce.old_chunk;
+       *last_new_chunk = ce.new_chunk;
 
        /*
         * Find number of consecutive chunks within the current area,
@@ -733,9 +737,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
        for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
             nr_consecutive++) {
                read_exception(ps, ps->current_committed - 1 - nr_consecutive,
-                              &de);
-               if (de.old_chunk != *last_old_chunk - nr_consecutive ||
-                   de.new_chunk != *last_new_chunk - nr_consecutive)
+                              &ce);
+               if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
+                   ce.new_chunk != *last_new_chunk - nr_consecutive)
                        break;
        }
 
@@ -753,7 +757,7 @@ static int persistent_commit_merge(struct dm_exception_store *store,
        for (i = 0; i < nr_merged; i++)
                clear_exception(ps, ps->current_committed - 1 - i);
 
-       r = area_io(ps, WRITE);
+       r = area_io(ps, WRITE_FLUSH_FUA);
        if (r < 0)
                return r;
 
index 9ecff5f..6f75887 100644 (file)
@@ -30,16 +30,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
        ((ti)->type->name == dm_snapshot_merge_target_name)
 
 /*
- * The percentage increment we will wake up users at
- */
-#define WAKE_UP_PERCENT 5
-
-/*
- * kcopyd priority of snapshot operations
- */
-#define SNAPSHOT_COPY_PRIORITY 2
-
-/*
  * The size of the mempool used to track chunks in use.
  */
 #define MIN_IOS 256
@@ -180,6 +170,13 @@ struct dm_snap_pending_exception {
         * kcopyd.
         */
        int started;
+
+       /*
+        * For writing a complete chunk, bypassing the copy.
+        */
+       struct bio *full_bio;
+       bio_end_io_t *full_bio_end_io;
+       void *full_bio_private;
 };
 
 /*
@@ -1055,8 +1052,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
        s = kmalloc(sizeof(*s), GFP_KERNEL);
        if (!s) {
-               ti->error = "Cannot allocate snapshot context private "
-                   "structure";
+               ti->error = "Cannot allocate private snapshot structure";
                r = -ENOMEM;
                goto bad;
        }
@@ -1380,6 +1376,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
        struct dm_snapshot *s = pe->snap;
        struct bio *origin_bios = NULL;
        struct bio *snapshot_bios = NULL;
+       struct bio *full_bio = NULL;
        int error = 0;
 
        if (!success) {
@@ -1415,10 +1412,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
         */
        dm_insert_exception(&s->complete, e);
 
- out:
+out:
        dm_remove_exception(&pe->e);
        snapshot_bios = bio_list_get(&pe->snapshot_bios);
        origin_bios = bio_list_get(&pe->origin_bios);
+       full_bio = pe->full_bio;
+       if (full_bio) {
+               full_bio->bi_end_io = pe->full_bio_end_io;
+               full_bio->bi_private = pe->full_bio_private;
+       }
        free_pending_exception(pe);
 
        increment_pending_exceptions_done_count();
@@ -1426,10 +1428,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
        up_write(&s->lock);
 
        /* Submit any pending write bios */
-       if (error)
+       if (error) {
+               if (full_bio)
+                       bio_io_error(full_bio);
                error_bios(snapshot_bios);
-       else
+       } else {
+               if (full_bio)
+                       bio_endio(full_bio, 0);
                flush_bios(snapshot_bios);
+       }
 
        retry_origin_bios(s, origin_bios);
 }
@@ -1480,8 +1487,33 @@ static void start_copy(struct dm_snap_pending_exception *pe)
        dest.count = src.count;
 
        /* Hand over to kcopyd */
-       dm_kcopyd_copy(s->kcopyd_client,
-                   &src, 1, &dest, 0, copy_callback, pe);
+       dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
+}
+
+static void full_bio_end_io(struct bio *bio, int error)
+{
+       void *callback_data = bio->bi_private;
+
+       dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0);
+}
+
+static void start_full_bio(struct dm_snap_pending_exception *pe,
+                          struct bio *bio)
+{
+       struct dm_snapshot *s = pe->snap;
+       void *callback_data;
+
+       pe->full_bio = bio;
+       pe->full_bio_end_io = bio->bi_end_io;
+       pe->full_bio_private = bio->bi_private;
+
+       callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
+                                                  copy_callback, pe);
+
+       bio->bi_end_io = full_bio_end_io;
+       bio->bi_private = callback_data;
+
+       generic_make_request(bio);
 }
 
 static struct dm_snap_pending_exception *
@@ -1519,6 +1551,7 @@ __find_pending_exception(struct dm_snapshot *s,
        bio_list_init(&pe->origin_bios);
        bio_list_init(&pe->snapshot_bios);
        pe->started = 0;
+       pe->full_bio = NULL;
 
        if (s->store->type->prepare_exception(s->store, &pe->e)) {
                free_pending_exception(pe);
@@ -1612,10 +1645,19 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
                }
 
                remap_exception(s, &pe->e, bio, chunk);
-               bio_list_add(&pe->snapshot_bios, bio);
 
                r = DM_MAPIO_SUBMITTED;
 
+               if (!pe->started &&
+                   bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) {
+                       pe->started = 1;
+                       up_write(&s->lock);
+                       start_full_bio(pe, bio);
+                       goto out;
+               }
+
+               bio_list_add(&pe->snapshot_bios, bio);
+
                if (!pe->started) {
                        /* this is protected by snap->lock */
                        pe->started = 1;
@@ -1628,9 +1670,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
                map_context->ptr = track_chunk(s, chunk);
        }
 
- out_unlock:
+out_unlock:
        up_write(&s->lock);
- out:
+out:
        return r;
 }
 
@@ -1974,7 +2016,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
                        pe_to_start_now = pe;
                }
 
- next_snapshot:
+next_snapshot:
                up_write(&snap->lock);
 
                if (pe_to_start_now) {
index bfe9c23..986b875 100644 (file)
@@ -54,7 +54,6 @@ struct dm_table {
        sector_t *highs;
        struct dm_target *targets;
 
-       unsigned discards_supported:1;
        unsigned integrity_supported:1;
 
        /*
@@ -154,12 +153,11 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
                return NULL;
 
        size = nmemb * elem_size;
-       addr = vmalloc(size);
-       if (addr)
-               memset(addr, 0, size);
+       addr = vzalloc(size);
 
        return addr;
 }
+EXPORT_SYMBOL(dm_vcalloc);
 
 /*
  * highs, and targets are managed as dynamic arrays during a
@@ -209,7 +207,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
        INIT_LIST_HEAD(&t->devices);
        INIT_LIST_HEAD(&t->target_callbacks);
        atomic_set(&t->holders, 0);
-       t->discards_supported = 1;
 
        if (!num_targets)
                num_targets = KEYS_PER_NODE;
@@ -281,6 +278,7 @@ void dm_table_get(struct dm_table *t)
 {
        atomic_inc(&t->holders);
 }
+EXPORT_SYMBOL(dm_table_get);
 
 void dm_table_put(struct dm_table *t)
 {
@@ -290,6 +288,7 @@ void dm_table_put(struct dm_table *t)
        smp_mb__before_atomic_dec();
        atomic_dec(&t->holders);
 }
+EXPORT_SYMBOL(dm_table_put);
 
 /*
  * Checks to see if we need to extend highs or targets.
@@ -455,13 +454,14 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
  * Add a device to the list, or just increment the usage count if
  * it's already present.
  */
-static int __table_get_device(struct dm_table *t, struct dm_target *ti,
-                     const char *path, fmode_t mode, struct dm_dev **result)
+int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+                 struct dm_dev **result)
 {
        int r;
        dev_t uninitialized_var(dev);
        struct dm_dev_internal *dd;
        unsigned int major, minor;
+       struct dm_table *t = ti->table;
 
        BUG_ON(!t);
 
@@ -509,6 +509,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
        *result = &dd->dm_dev;
        return 0;
 }
+EXPORT_SYMBOL(dm_get_device);
 
 int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
                         sector_t start, sector_t len, void *data)
@@ -539,23 +540,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
         * If not we'll force DM to use PAGE_SIZE or
         * smaller I/O, just to be safe.
         */
-
-       if (q->merge_bvec_fn && !ti->type->merge)
+       if (dm_queue_merge_is_compulsory(q) && !ti->type->merge)
                blk_limits_max_hw_sectors(limits,
                                          (unsigned int) (PAGE_SIZE >> 9));
        return 0;
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
-int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
-                 struct dm_dev **result)
-{
-       return __table_get_device(ti->table, ti, path, mode, result);
-}
-
-
 /*
- * Decrement a devices use count and remove it if necessary.
+ * Decrement a device's use count and remove it if necessary.
  */
 void dm_put_device(struct dm_target *ti, struct dm_dev *d)
 {
@@ -568,6 +561,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d)
                kfree(dd);
        }
 }
+EXPORT_SYMBOL(dm_put_device);
 
 /*
  * Checks to see if the target joins onto the end of the table.
@@ -791,8 +785,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 
        t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
-       if (!tgt->num_discard_requests)
-               t->discards_supported = 0;
+       if (!tgt->num_discard_requests && tgt->discards_supported)
+               DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.",
+                      dm_device_name(t->md), type);
 
        return 0;
 
@@ -802,6 +797,63 @@ int dm_table_add_target(struct dm_table *t, const char *type,
        return r;
 }
 
+/*
+ * Target argument parsing helpers.
+ */
+static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
+                            unsigned *value, char **error, unsigned grouped)
+{
+       const char *arg_str = dm_shift_arg(arg_set);
+
+       if (!arg_str ||
+           (sscanf(arg_str, "%u", value) != 1) ||
+           (*value < arg->min) ||
+           (*value > arg->max) ||
+           (grouped && arg_set->argc < *value)) {
+               *error = arg->error;
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
+               unsigned *value, char **error)
+{
+       return validate_next_arg(arg, arg_set, value, error, 0);
+}
+EXPORT_SYMBOL(dm_read_arg);
+
+int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
+                     unsigned *value, char **error)
+{
+       return validate_next_arg(arg, arg_set, value, error, 1);
+}
+EXPORT_SYMBOL(dm_read_arg_group);
+
+const char *dm_shift_arg(struct dm_arg_set *as)
+{
+       char *r;
+
+       if (as->argc) {
+               as->argc--;
+               r = *as->argv;
+               as->argv++;
+               return r;
+       }
+
+       return NULL;
+}
+EXPORT_SYMBOL(dm_shift_arg);
+
+void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
+{
+       BUG_ON(as->argc < num_args);
+       as->argc -= num_args;
+       as->argv += num_args;
+}
+EXPORT_SYMBOL(dm_consume_args);
+
 static int dm_table_set_type(struct dm_table *t)
 {
        unsigned i;
@@ -1077,11 +1129,13 @@ void dm_table_event(struct dm_table *t)
                t->event_fn(t->event_context);
        mutex_unlock(&_event_lock);
 }
+EXPORT_SYMBOL(dm_table_event);
 
 sector_t dm_table_get_size(struct dm_table *t)
 {
        return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
 }
+EXPORT_SYMBOL(dm_table_get_size);
 
 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
 {
@@ -1194,9 +1248,45 @@ static void dm_table_set_integrity(struct dm_table *t)
                               blk_get_integrity(template_disk));
 }
 
+static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
+                               sector_t start, sector_t len, void *data)
+{
+       unsigned flush = (*(unsigned *)data);
+       struct request_queue *q = bdev_get_queue(dev->bdev);
+
+       return q && (q->flush_flags & flush);
+}
+
+static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
+{
+       struct dm_target *ti;
+       unsigned i = 0;
+
+       /*
+        * Require at least one underlying device to support flushes.
+        * t->devices includes internal dm devices such as mirror logs
+        * so we need to use iterate_devices here, which targets
+        * supporting flushes must provide.
+        */
+       while (i < dm_table_get_num_targets(t)) {
+               ti = dm_table_get_target(t, i++);
+
+               if (!ti->num_flush_requests)
+                       continue;
+
+               if (ti->type->iterate_devices &&
+                   ti->type->iterate_devices(ti, device_flush_capable, &flush))
+                       return 1;
+       }
+
+       return 0;
+}
+
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
                               struct queue_limits *limits)
 {
+       unsigned flush = 0;
+
        /*
         * Copy table's limits to the DM device's request_queue
         */
@@ -1207,6 +1297,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
        else
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 
+       if (dm_table_supports_flush(t, REQ_FLUSH)) {
+               flush |= REQ_FLUSH;
+               if (dm_table_supports_flush(t, REQ_FUA))
+                       flush |= REQ_FUA;
+       }
+       blk_queue_flush(q, flush);
+
        dm_table_set_integrity(t);
 
        /*
@@ -1237,6 +1334,7 @@ fmode_t dm_table_get_mode(struct dm_table *t)
 {
        return t->mode;
 }
+EXPORT_SYMBOL(dm_table_get_mode);
 
 static void suspend_targets(struct dm_table *t, unsigned postsuspend)
 {
@@ -1345,6 +1443,7 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
 {
        return t->md;
 }
+EXPORT_SYMBOL(dm_table_get_md);
 
 static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
                                  sector_t start, sector_t len, void *data)
@@ -1359,19 +1458,19 @@ bool dm_table_supports_discards(struct dm_table *t)
        struct dm_target *ti;
        unsigned i = 0;
 
-       if (!t->discards_supported)
-               return 0;
-
        /*
         * Unless any target used by the table set discards_supported,
         * require at least one underlying device to support discards.
         * t->devices includes internal dm devices such as mirror logs
         * so we need to use iterate_devices here, which targets
-        * supporting discard must provide.
+        * supporting discard selectively must provide.
         */
        while (i < dm_table_get_num_targets(t)) {
                ti = dm_table_get_target(t, i++);
 
+               if (!ti->num_discard_requests)
+                       continue;
+
                if (ti->discards_supported)
                        return 1;
 
@@ -1382,13 +1481,3 @@ bool dm_table_supports_discards(struct dm_table *t)
 
        return 0;
 }
-
-EXPORT_SYMBOL(dm_vcalloc);
-EXPORT_SYMBOL(dm_get_device);
-EXPORT_SYMBOL(dm_put_device);
-EXPORT_SYMBOL(dm_table_event);
-EXPORT_SYMBOL(dm_table_get_size);
-EXPORT_SYMBOL(dm_table_get_mode);
-EXPORT_SYMBOL(dm_table_get_md);
-EXPORT_SYMBOL(dm_table_put);
-EXPORT_SYMBOL(dm_table_get);
index 0cf68b4..52b39f3 100644 (file)
@@ -37,6 +37,8 @@ static const char *_name = DM_NAME;
 static unsigned int major = 0;
 static unsigned int _major = 0;
 
+static DEFINE_IDR(_minor_idr);
+
 static DEFINE_SPINLOCK(_minor_lock);
 /*
  * For bio-based dm.
@@ -109,6 +111,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
 #define DMF_FREEING 3
 #define DMF_DELETING 4
 #define DMF_NOFLUSH_SUSPENDING 5
+#define DMF_MERGE_IS_OPTIONAL 6
 
 /*
  * Work processed by per-device workqueue.
@@ -313,6 +316,12 @@ static void __exit dm_exit(void)
 
        while (i--)
                _exits[i]();
+
+       /*
+        * Should be empty by this point.
+        */
+       idr_remove_all(&_minor_idr);
+       idr_destroy(&_minor_idr);
 }
 
 /*
@@ -1171,7 +1180,8 @@ static int __clone_and_map_discard(struct clone_info *ci)
 
                /*
                 * Even though the device advertised discard support,
-                * reconfiguration might have changed that since the
+                * that does not mean every target supports it, and
+                * reconfiguration might also have changed that since the
                 * check was performed.
                 */
                if (!ti->num_discard_requests)
@@ -1705,8 +1715,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
 /*-----------------------------------------------------------------
  * An IDR is used to keep track of allocated minor numbers.
  *---------------------------------------------------------------*/
-static DEFINE_IDR(_minor_idr);
-
 static void free_minor(int minor)
 {
        spin_lock(&_minor_lock);
@@ -1800,7 +1808,6 @@ static void dm_init_md_queue(struct mapped_device *md)
        blk_queue_make_request(md->queue, dm_request);
        blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
        blk_queue_merge_bvec(md->queue, dm_merge_bvec);
-       blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
 }
 
 /*
@@ -1986,6 +1993,59 @@ static void __set_size(struct mapped_device *md, sector_t size)
 }
 
 /*
+ * Return 1 if the queue has a compulsory merge_bvec_fn function.
+ *
+ * If this function returns 0, then the device is either a non-dm
+ * device without a merge_bvec_fn, or it is a dm device that is
+ * able to split any bios it receives that are too big.
+ */
+int dm_queue_merge_is_compulsory(struct request_queue *q)
+{
+       struct mapped_device *dev_md;
+
+       if (!q->merge_bvec_fn)
+               return 0;
+
+       if (q->make_request_fn == dm_request) {
+               dev_md = q->queuedata;
+               if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
+                       return 0;
+       }
+
+       return 1;
+}
+
+static int dm_device_merge_is_compulsory(struct dm_target *ti,
+                                        struct dm_dev *dev, sector_t start,
+                                        sector_t len, void *data)
+{
+       struct block_device *bdev = dev->bdev;
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       return dm_queue_merge_is_compulsory(q);
+}
+
+/*
+ * Return 1 if it is acceptable to ignore merge_bvec_fn based
+ * on the properties of the underlying devices.
+ */
+static int dm_table_merge_is_optional(struct dm_table *table)
+{
+       unsigned i = 0;
+       struct dm_target *ti;
+
+       while (i < dm_table_get_num_targets(table)) {
+               ti = dm_table_get_target(table, i++);
+
+               if (ti->type->iterate_devices &&
+                   ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
+                       return 0;
+       }
+
+       return 1;
+}
+
+/*
  * Returns old map, which caller must destroy.
  */
 static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
@@ -1995,6 +2055,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
        struct request_queue *q = md->queue;
        sector_t size;
        unsigned long flags;
+       int merge_is_optional;
 
        size = dm_table_get_size(t);
 
@@ -2020,10 +2081,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 
        __bind_mempools(md, t);
 
+       merge_is_optional = dm_table_merge_is_optional(t);
+
        write_lock_irqsave(&md->map_lock, flags);
        old_map = md->map;
        md->map = t;
        dm_table_set_restrictions(t, q, limits);
+       if (merge_is_optional)
+               set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
+       else
+               clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
        write_unlock_irqrestore(&md->map_lock, flags);
 
        return old_map;
index 1aaf167..6745dbd 100644 (file)
@@ -66,6 +66,8 @@ int dm_table_alloc_md_mempools(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
 
+int dm_queue_merge_is_compulsory(struct request_queue *q);
+
 void dm_lock_md_type(struct mapped_device *md);
 void dm_unlock_md_type(struct mapped_device *md);
 void dm_set_md_type(struct mapped_device *md, unsigned type);
index da1f4b9..72c33fb 100644 (file)
@@ -610,6 +610,6 @@ void __iomem *of_iomap(struct device_node *np, int index)
        if (of_address_to_resource(np, index, &res))
                return NULL;
 
-       return ioremap(res.start, 1 + res.end - res.start);
+       return ioremap(res.start, resource_size(&res));
 }
 EXPORT_SYMBOL(of_iomap);
index 02ed367..fb28b5a 100644 (file)
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  */
+#include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 
+/**
+ * struct alias_prop - Alias property in 'aliases' node
+ * @link:      List node to link the structure in aliases_lookup list
+ * @alias:     Alias property name
+ * @np:                Pointer to device_node that the alias stands for
+ * @id:                Index value from end of alias name
+ * @stem:      Alias string without the index
+ *
+ * The structure represents one alias property of 'aliases' node as
+ * an entry in aliases_lookup list.
+ */
+struct alias_prop {
+       struct list_head link;
+       const char *alias;
+       struct device_node *np;
+       int id;
+       char stem[0];
+};
+
+static LIST_HEAD(aliases_lookup);
+
 struct device_node *allnodes;
 struct device_node *of_chosen;
+struct device_node *of_aliases;
+
+static DEFINE_MUTEX(of_aliases_mutex);
 
 /* use when traversing tree through the allnext, child, sibling,
  * or parent members of struct device_node.
@@ -610,8 +635,9 @@ EXPORT_SYMBOL(of_find_node_by_phandle);
  *
  * The out_value is modified only if a valid u32 value can be decoded.
  */
-int of_property_read_u32_array(const struct device_node *np, char *propname,
-                              u32 *out_values, size_t sz)
+int of_property_read_u32_array(const struct device_node *np,
+                              const char *propname, u32 *out_values,
+                              size_t sz)
 {
        struct property *prop = of_find_property(np, propname, NULL);
        const __be32 *val;
@@ -645,7 +671,7 @@ EXPORT_SYMBOL_GPL(of_property_read_u32_array);
  *
  * The out_string pointer is modified only if a valid string can be decoded.
  */
-int of_property_read_string(struct device_node *np, char *propname,
+int of_property_read_string(struct device_node *np, const char *propname,
                                const char **out_string)
 {
        struct property *prop = of_find_property(np, propname, NULL);
@@ -987,3 +1013,108 @@ out_unlock:
 }
 #endif /* defined(CONFIG_OF_DYNAMIC) */
 
+static void of_alias_add(struct alias_prop *ap, struct device_node *np,
+                        int id, const char *stem, int stem_len)
+{
+       ap->id = id;
+       ap->np = np;
+       strncpy(ap->stem, stem, stem_len);
+       ap->stem[stem_len] = 0;
+       list_add_tail(&ap->link, &aliases_lookup);
+       pr_debug("adding DT alias:%s: stem=%s id=%i node=%s\n",
+                ap->alias, ap->stem, ap->id, np ? np->full_name : NULL);
+}
+
+/**
+ * of_alias_scan() - Scan all properties of 'aliases' node
+ *
+ * The function scans all the properties of 'aliases' node and populate
+ * the global lookup table with the properties.  It returns the
+ * number of alias_prop found, or error code in error case.
+ */
+__init void of_alias_scan(void)
+{
+       struct property *pp;
+
+       if (!of_aliases)
+               return;
+
+       for_each_property(pp, of_aliases->properties) {
+               const char *start = pp->name;
+               const char *end = start + strlen(start);
+               struct device_node *np;
+               struct alias_prop *ap;
+               int id, len;
+
+               /* Skip those we do not want to proceed */
+               if (!strcmp(pp->name, "name") ||
+                   !strcmp(pp->name, "phandle") ||
+                   !strcmp(pp->name, "linux,phandle"))
+                       continue;
+
+               np = of_find_node_by_path(pp->value);
+               if (!np)
+                       continue;
+
+               /* walk alias backwards to extract the id and 'stem' string */
+               while (isdigit(*(end-1)) && end > start)
+                       end--;
+               len = end - start;
+               id = strlen(end) ? simple_strtoul(end, NULL, 10) : -1;
+
+               /* Allocate an alias_prop with enough space for the stem */
+               ap = early_init_dt_alloc_memory_arch(sizeof(*ap) + len + 1, 4);
+               if (!ap)
+                       continue;
+               ap->alias = start;
+               of_alias_add(ap, np, id, start, len);
+       }
+}
+
+/**
+ * of_alias_get_id() - Get alias id for the given device_node
+ * @np:                Pointer to the given device_node
+ * @stem:      Alias stem of the given device_node
+ *
+ * The function travels the lookup table to get alias id for the given
+ * device_node and alias stem.  It returns the alias id if find it.
+ * If not, dynamically creates one in the lookup table and returns it,
+ * or returns error code if fail to create.
+ */
+int of_alias_get_id(struct device_node *np, const char *stem)
+{
+       struct alias_prop *app;
+       int id = 0;
+       bool found = false;
+
+       mutex_lock(&of_aliases_mutex);
+       list_for_each_entry(app, &aliases_lookup, link) {
+               if (strcmp(app->stem, stem) != 0)
+                       continue;
+
+               if (np == app->np) {
+                       found = true;
+                       id = app->id;
+                       break;
+               }
+
+               if (id <= app->id)
+                       id = app->id + 1;
+       }
+
+       /* If an id is not found, then allocate a new one */
+       if (!found) {
+               app = kzalloc(sizeof(*app) + strlen(stem) + 1, 4);
+               if (!app) {
+                       id = -ENODEV;
+                       goto out;
+               }
+               of_alias_add(app, np, id, stem, strlen(stem));
+       }
+
+ out:
+       mutex_unlock(&of_aliases_mutex);
+
+       return id;
+}
+EXPORT_SYMBOL_GPL(of_alias_get_id);
index 65200af..13d6d3a 100644 (file)
@@ -707,10 +707,12 @@ void __init unflatten_device_tree(void)
        __unflatten_device_tree(initial_boot_params, &allnodes,
                                early_init_dt_alloc_memory_arch);
 
-       /* Get pointer to OF "/chosen" node for use everywhere */
+       /* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */
        of_chosen = of_find_node_by_path("/chosen");
        if (of_chosen == NULL)
                of_chosen = of_find_node_by_path("/chosen@0");
+       of_aliases = of_find_node_by_path("/aliases");
+       of_alias_scan();
 }
 
 #endif /* CONFIG_OF_EARLY_FLATTREE */
index a70fa89..2202857 100644 (file)
@@ -110,7 +110,7 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val,
 }
 
 
-static struct acpi_dock_ops acpiphp_dock_ops = {
+static const struct acpi_dock_ops acpiphp_dock_ops = {
        .handler = handle_hotplug_event_func,
 };
 
index bcae8dd..7789002 100644 (file)
@@ -368,7 +368,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
                pr_info("%s: already running\n", pdev->name);
 
        /* force to 24 hour mode */
-       new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
+       new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
        new_ctrl |= OMAP_RTC_CTRL_STOP;
 
        /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE:
index eba88c7..730b4a3 100644 (file)
@@ -2267,17 +2267,13 @@ static int __devexit
 pl022_remove(struct amba_device *adev)
 {
        struct pl022 *pl022 = amba_get_drvdata(adev);
-       int status = 0;
+
        if (!pl022)
                return 0;
 
        /* Remove the queue */
-       status = destroy_queue(pl022);
-       if (status != 0) {
-               dev_err(&adev->dev,
-                       "queue remove failed (%d)\n", status);
-               return status;
-       }
+       if (destroy_queue(pl022) != 0)
+               dev_err(&adev->dev, "queue remove failed\n");
        load_ssp_default_config(pl022);
        pl022_dma_remove(pl022);
        free_irq(adev->irq[0], pl022);
@@ -2289,7 +2285,6 @@ pl022_remove(struct amba_device *adev)
        spi_unregister_master(pl022->master);
        spi_master_put(pl022->master);
        amba_set_drvdata(adev, NULL);
-       dev_dbg(&adev->dev, "remove succeeded\n");
        return 0;
 }
 
index 564ff4e..8345fb4 100644 (file)
@@ -1,5 +1,6 @@
 config ISCSI_TARGET
        tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
+       depends on NET
        select CRYPTO
        select CRYPTO_CRC32C
        select CRYPTO_CRC32C_INTEL if X86
index 14c81c4..c24fb10 100644 (file)
@@ -120,7 +120,7 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
        struct iscsi_tiqn *tiqn = NULL;
        int ret;
 
-       if (strlen(buf) > ISCSI_IQN_LEN) {
+       if (strlen(buf) >= ISCSI_IQN_LEN) {
                pr_err("Target IQN exceeds %d bytes\n",
                                ISCSI_IQN_LEN);
                return ERR_PTR(-EINVAL);
@@ -1857,7 +1857,7 @@ static int iscsit_handle_text_cmd(
        char *text_ptr, *text_in;
        int cmdsn_ret, niov = 0, rx_got, rx_size;
        u32 checksum = 0, data_crc = 0, payload_length;
-       u32 padding = 0, text_length = 0;
+       u32 padding = 0, pad_bytes = 0, text_length = 0;
        struct iscsi_cmd *cmd;
        struct kvec iov[3];
        struct iscsi_text *hdr;
@@ -1896,7 +1896,7 @@ static int iscsit_handle_text_cmd(
 
                padding = ((-payload_length) & 3);
                if (padding != 0) {
-                       iov[niov].iov_base = cmd->pad_bytes;
+                       iov[niov].iov_base = &pad_bytes;
                        iov[niov++].iov_len  = padding;
                        rx_size += padding;
                        pr_debug("Receiving %u additional bytes"
@@ -1917,7 +1917,7 @@ static int iscsit_handle_text_cmd(
                if (conn->conn_ops->DataDigest) {
                        iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
                                        text_in, text_length,
-                                       padding, cmd->pad_bytes,
+                                       padding, (u8 *)&pad_bytes,
                                        (u8 *)&data_crc);
 
                        if (checksum != data_crc) {
@@ -3468,7 +3468,12 @@ static inline void iscsit_thread_check_cpumask(
 }
 
 #else
-#define iscsit_thread_get_cpumask(X) ({})
+
+void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
+{
+       return;
+}
+
 #define iscsit_thread_check_cpumask(X, Y, Z) ({})
 #endif /* CONFIG_SMP */
 
index 32bb92c..f095e65 100644 (file)
@@ -181,7 +181,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
                return ERR_PTR(-EOVERFLOW);
        }
        memset(buf, 0, MAX_PORTAL_LEN + 1);
-       snprintf(buf, MAX_PORTAL_LEN, "%s", name);
+       snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name);
 
        memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
 
index 713a4d2..4d087ac 100644 (file)
@@ -978,7 +978,7 @@ struct iscsi_login *iscsi_target_init_negotiation(
                pr_err("Unable to allocate memory for struct iscsi_login.\n");
                iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
                                ISCSI_LOGIN_STATUS_NO_RESOURCES);
-               goto out;
+               return NULL;
        }
 
        login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
index c75a01a..8976032 100644 (file)
@@ -1747,6 +1747,8 @@ int transport_generic_handle_cdb(
 }
 EXPORT_SYMBOL(transport_generic_handle_cdb);
 
+static void transport_generic_request_failure(struct se_cmd *,
+                       struct se_device *, int, int);
 /*
  * Used by fabric module frontends to queue tasks directly.
  * Many only be used from process context only
@@ -1754,6 +1756,8 @@ EXPORT_SYMBOL(transport_generic_handle_cdb);
 int transport_handle_cdb_direct(
        struct se_cmd *cmd)
 {
+       int ret;
+
        if (!cmd->se_lun) {
                dump_stack();
                pr_err("cmd->se_lun is NULL\n");
@@ -1765,8 +1769,31 @@ int transport_handle_cdb_direct(
                                " from interrupt context\n");
                return -EINVAL;
        }
-
-       return transport_generic_new_cmd(cmd);
+       /*
+        * Set TRANSPORT_NEW_CMD state and cmd->t_transport_active=1 following
+        * transport_generic_handle_cdb*() -> transport_add_cmd_to_queue()
+        * in existing usage to ensure that outstanding descriptors are handled
+        * correctly during shutdown via transport_generic_wait_for_tasks()
+        *
+        * Also, we don't take cmd->t_state_lock here as we only expect
+        * this to be called for initial descriptor submission.
+        */
+       cmd->t_state = TRANSPORT_NEW_CMD;
+       atomic_set(&cmd->t_transport_active, 1);
+       /*
+        * transport_generic_new_cmd() is already handling QUEUE_FULL,
+        * so follow TRANSPORT_NEW_CMD processing thread context usage
+        * and call transport_generic_request_failure() if necessary..
+        */
+       ret = transport_generic_new_cmd(cmd);
+       if (ret == -EAGAIN)
+               return 0;
+       else if (ret < 0) {
+               cmd->transport_error_status = ret;
+               transport_generic_request_failure(cmd, NULL, 0,
+                               (cmd->data_direction != DMA_TO_DEVICE));
+       }
+       return 0;
 }
 EXPORT_SYMBOL(transport_handle_cdb_direct);
 
@@ -3324,7 +3351,7 @@ static int transport_generic_cmd_sequencer(
                        goto out_invalid_cdb_field;
                }
 
-               cmd->t_task_lba = get_unaligned_be16(&cdb[2]);
+               cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
                passthrough = (dev->transport->transport_type ==
                                TRANSPORT_PLUGIN_PHBA_PDEV);
                /*
index f7fff7e..bd4fe21 100644 (file)
@@ -187,4 +187,9 @@ void ft_dump_cmd(struct ft_cmd *, const char *caller);
 
 ssize_t ft_format_wwn(char *, size_t, u64);
 
+/*
+ * Underlying HW specific helper function
+ */
+void ft_invl_hw_context(struct ft_cmd *);
+
 #endif /* __TCM_FC_H__ */
index 09df38b..5654dc2 100644 (file)
@@ -320,6 +320,7 @@ static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg)
        default:
                pr_debug("%s: unhandled frame r_ctl %x\n",
                       __func__, fh->fh_r_ctl);
+               ft_invl_hw_context(cmd);
                fc_frame_free(fp);
                transport_generic_free_cmd(&cmd->se_cmd, 0, 0);
                break;
index 8e2a46d..c37f4cd 100644 (file)
@@ -213,62 +213,49 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
        if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF))
                goto drop;
 
+       f_ctl = ntoh24(fh->fh_f_ctl);
+       ep = fc_seq_exch(seq);
+       lport = ep->lp;
+       if (cmd->was_ddp_setup) {
+               BUG_ON(!ep);
+               BUG_ON(!lport);
+       }
+
        /*
-        * Doesn't expect even single byte of payload. Payload
+        * Doesn't expect payload if DDP is setup. Payload
         * is expected to be copied directly to user buffers
-        * due to DDP (Large Rx offload) feature, hence
-        * BUG_ON if BUF is non-NULL
+        * due to DDP (Large Rx offload),
         */
        buf = fc_frame_payload_get(fp, 1);
-       if (cmd->was_ddp_setup && buf) {
-               pr_debug("%s: When DDP was setup, not expected to"
-                                "receive frame with payload, Payload shall be"
-                                "copied directly to buffer instead of coming "
-                                "via. legacy receive queues\n", __func__);
-               BUG_ON(buf);
-       }
+       if (buf)
+               pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
+                               "cmd->sg_cnt 0x%x. DDP was setup"
+                               " hence not expected to receive frame with "
+                               "payload, Frame will be dropped if "
+                               "'Sequence Initiative' bit in f_ctl is "
+                               "not set\n", __func__, ep->xid, f_ctl,
+                               cmd->sg, cmd->sg_cnt);
+       /*
+        * Invalidate HW DDP context if it was setup for respective
+        * command. Invalidation of HW DDP context is requited in both
+        * situation (success and error). 
+        */
+       ft_invl_hw_context(cmd);
 
        /*
-        * If ft_cmd indicated 'ddp_setup', in that case only the last frame
-        * should come with 'TSI bit being set'. If 'TSI bit is not set and if
-        * data frame appears here, means error condition. In both the cases
-        * release the DDP context (ddp_put) and in error case, as well
-        * initiate error recovery mechanism.
+        * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
+        * write data frame is received successfully where payload is
+        * posted directly to user buffer and only the last frame's
+        * header is posted in receive queue.
+        *
+        * If "Sequence Initiative (TSI)" bit is not set, means error
+        * condition w.r.t. DDP, hence drop the packet and let explict
+        * ABORTS from other end of exchange timer trigger the recovery.
         */
-       ep = fc_seq_exch(seq);
-       if (cmd->was_ddp_setup) {
-               BUG_ON(!ep);
-               lport = ep->lp;
-               BUG_ON(!lport);
-       }
-       if (cmd->was_ddp_setup && ep->xid != FC_XID_UNKNOWN) {
-               f_ctl = ntoh24(fh->fh_f_ctl);
-               /*
-                * If TSI bit set in f_ctl, means last write data frame is
-                * received successfully where payload is posted directly
-                * to user buffer and only the last frame's header is posted
-                * in legacy receive queue
-                */
-               if (f_ctl & FC_FC_SEQ_INIT) { /* TSI bit set in FC frame */
-                       cmd->write_data_len = lport->tt.ddp_done(lport,
-                                                               ep->xid);
-                       goto last_frame;
-               } else {
-                       /*
-                        * Updating the write_data_len may be meaningless at
-                        * this point, but just in case if required in future
-                        * for debugging or any other purpose
-                        */
-                       pr_err("%s: Received frame with TSI bit not"
-                                       " being SET, dropping the frame, "
-                                       "cmd->sg <%p>, cmd->sg_cnt <0x%x>\n",
-                                       __func__, cmd->sg, cmd->sg_cnt);
-                       cmd->write_data_len = lport->tt.ddp_done(lport,
-                                                             ep->xid);
-                       lport->tt.seq_exch_abort(cmd->seq, 0);
-                       goto drop;
-               }
-       }
+       if (f_ctl & FC_FC_SEQ_INIT)
+               goto last_frame;
+       else
+               goto drop;
 
        rel_off = ntohl(fh->fh_parm_offset);
        frame_len = fr_len(fp);
@@ -331,3 +318,39 @@ last_frame:
 drop:
        fc_frame_free(fp);
 }
+
+/*
+ * Handle and cleanup any HW specific resources if
+ * received ABORTS, errors, timeouts.
+ */
+void ft_invl_hw_context(struct ft_cmd *cmd)
+{
+       struct fc_seq *seq = cmd->seq;
+       struct fc_exch *ep = NULL;
+       struct fc_lport *lport = NULL;
+
+       BUG_ON(!cmd);
+
+       /* Cleanup the DDP context in HW if DDP was setup */
+       if (cmd->was_ddp_setup && seq) {
+               ep = fc_seq_exch(seq);
+               if (ep) {
+                       lport = ep->lp;
+                       if (lport && (ep->xid <= lport->lro_xid))
+                               /*
+                                * "ddp_done" trigger invalidation of HW
+                                * specific DDP context
+                                */
+                               cmd->write_data_len = lport->tt.ddp_done(lport,
+                                                                     ep->xid);
+
+                               /*
+                                * Resetting same variable to indicate HW's
+                                * DDP context has been invalidated to avoid
+                                * re_invalidation of same context (context is
+                                * identified using ep->xid)
+                                */
+                               cmd->was_ddp_setup = 0;
+               }
+       }
+}
index bf7c687..f7f71b2 100644 (file)
@@ -14,11 +14,7 @@ menuconfig THERMAL
          If you want this support, you should say Y or M here.
 
 config THERMAL_HWMON
-       bool "Hardware monitoring support"
+       bool
        depends on THERMAL
        depends on HWMON=y || HWMON=THERMAL
-       help
-         The generic thermal sysfs driver's hardware monitoring support
-         requires a 2.10.7/3.0.2 or later lm-sensors userspace.
-
-         Say Y if your user-space is new enough.
+       default y
index 0b1c82a..708f8e9 100644 (file)
@@ -420,6 +420,29 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 
 /* hwmon sys I/F */
 #include <linux/hwmon.h>
+
+/* thermal zone devices with the same type share one hwmon device */
+struct thermal_hwmon_device {
+       char type[THERMAL_NAME_LENGTH];
+       struct device *device;
+       int count;
+       struct list_head tz_list;
+       struct list_head node;
+};
+
+struct thermal_hwmon_attr {
+       struct device_attribute attr;
+       char name[16];
+};
+
+/* one temperature input for each thermal zone */
+struct thermal_hwmon_temp {
+       struct list_head hwmon_node;
+       struct thermal_zone_device *tz;
+       struct thermal_hwmon_attr temp_input;   /* hwmon sys attr */
+       struct thermal_hwmon_attr temp_crit;    /* hwmon sys attr */
+};
+
 static LIST_HEAD(thermal_hwmon_list);
 
 static ssize_t
@@ -437,9 +460,10 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
        int ret;
        struct thermal_hwmon_attr *hwmon_attr
                        = container_of(attr, struct thermal_hwmon_attr, attr);
-       struct thermal_zone_device *tz
-                       = container_of(hwmon_attr, struct thermal_zone_device,
+       struct thermal_hwmon_temp *temp
+                       = container_of(hwmon_attr, struct thermal_hwmon_temp,
                                       temp_input);
+       struct thermal_zone_device *tz = temp->tz;
 
        ret = tz->ops->get_temp(tz, &temperature);
 
@@ -455,9 +479,10 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
 {
        struct thermal_hwmon_attr *hwmon_attr
                        = container_of(attr, struct thermal_hwmon_attr, attr);
-       struct thermal_zone_device *tz
-                       = container_of(hwmon_attr, struct thermal_zone_device,
+       struct thermal_hwmon_temp *temp
+                       = container_of(hwmon_attr, struct thermal_hwmon_temp,
                                       temp_crit);
+       struct thermal_zone_device *tz = temp->tz;
        long temperature;
        int ret;
 
@@ -469,22 +494,54 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
 }
 
 
-static int
-thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+static struct thermal_hwmon_device *
+thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
 {
        struct thermal_hwmon_device *hwmon;
-       int new_hwmon_device = 1;
-       int result;
 
        mutex_lock(&thermal_list_lock);
        list_for_each_entry(hwmon, &thermal_hwmon_list, node)
                if (!strcmp(hwmon->type, tz->type)) {
-                       new_hwmon_device = 0;
                        mutex_unlock(&thermal_list_lock);
-                       goto register_sys_interface;
+                       return hwmon;
+               }
+       mutex_unlock(&thermal_list_lock);
+
+       return NULL;
+}
+
+/* Find the temperature input matching a given thermal zone */
+static struct thermal_hwmon_temp *
+thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
+                         const struct thermal_zone_device *tz)
+{
+       struct thermal_hwmon_temp *temp;
+
+       mutex_lock(&thermal_list_lock);
+       list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
+               if (temp->tz == tz) {
+                       mutex_unlock(&thermal_list_lock);
+                       return temp;
                }
        mutex_unlock(&thermal_list_lock);
 
+       return NULL;
+}
+
+static int
+thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+       struct thermal_hwmon_device *hwmon;
+       struct thermal_hwmon_temp *temp;
+       int new_hwmon_device = 1;
+       int result;
+
+       hwmon = thermal_hwmon_lookup_by_type(tz);
+       if (hwmon) {
+               new_hwmon_device = 0;
+               goto register_sys_interface;
+       }
+
        hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL);
        if (!hwmon)
                return -ENOMEM;
@@ -502,30 +559,36 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
                goto free_mem;
 
  register_sys_interface:
-       tz->hwmon = hwmon;
+       temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL);
+       if (!temp) {
+               result = -ENOMEM;
+               goto unregister_name;
+       }
+
+       temp->tz = tz;
        hwmon->count++;
 
-       snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH,
+       snprintf(temp->temp_input.name, THERMAL_NAME_LENGTH,
                 "temp%d_input", hwmon->count);
-       tz->temp_input.attr.attr.name = tz->temp_input.name;
-       tz->temp_input.attr.attr.mode = 0444;
-       tz->temp_input.attr.show = temp_input_show;
-       sysfs_attr_init(&tz->temp_input.attr.attr);
-       result = device_create_file(hwmon->device, &tz->temp_input.attr);
+       temp->temp_input.attr.attr.name = temp->temp_input.name;
+       temp->temp_input.attr.attr.mode = 0444;
+       temp->temp_input.attr.show = temp_input_show;
+       sysfs_attr_init(&temp->temp_input.attr.attr);
+       result = device_create_file(hwmon->device, &temp->temp_input.attr);
        if (result)
-               goto unregister_name;
+               goto free_temp_mem;
 
        if (tz->ops->get_crit_temp) {
                unsigned long temperature;
                if (!tz->ops->get_crit_temp(tz, &temperature)) {
-                       snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH,
+                       snprintf(temp->temp_crit.name, THERMAL_NAME_LENGTH,
                                "temp%d_crit", hwmon->count);
-                       tz->temp_crit.attr.attr.name = tz->temp_crit.name;
-                       tz->temp_crit.attr.attr.mode = 0444;
-                       tz->temp_crit.attr.show = temp_crit_show;
-                       sysfs_attr_init(&tz->temp_crit.attr.attr);
+                       temp->temp_crit.attr.attr.name = temp->temp_crit.name;
+                       temp->temp_crit.attr.attr.mode = 0444;
+                       temp->temp_crit.attr.show = temp_crit_show;
+                       sysfs_attr_init(&temp->temp_crit.attr.attr);
                        result = device_create_file(hwmon->device,
-                                                   &tz->temp_crit.attr);
+                                                   &temp->temp_crit.attr);
                        if (result)
                                goto unregister_input;
                }
@@ -534,13 +597,15 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
        mutex_lock(&thermal_list_lock);
        if (new_hwmon_device)
                list_add_tail(&hwmon->node, &thermal_hwmon_list);
-       list_add_tail(&tz->hwmon_node, &hwmon->tz_list);
+       list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
        mutex_unlock(&thermal_list_lock);
 
        return 0;
 
  unregister_input:
-       device_remove_file(hwmon->device, &tz->temp_input.attr);
+       device_remove_file(hwmon->device, &temp->temp_input.attr);
+ free_temp_mem:
+       kfree(temp);
  unregister_name:
        if (new_hwmon_device) {
                device_remove_file(hwmon->device, &dev_attr_name);
@@ -556,15 +621,30 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
 static void
 thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
 {
-       struct thermal_hwmon_device *hwmon = tz->hwmon;
+       struct thermal_hwmon_device *hwmon;
+       struct thermal_hwmon_temp *temp;
+
+       hwmon = thermal_hwmon_lookup_by_type(tz);
+       if (unlikely(!hwmon)) {
+               /* Should never happen... */
+               dev_dbg(&tz->device, "hwmon device lookup failed!\n");
+               return;
+       }
+
+       temp = thermal_hwmon_lookup_temp(hwmon, tz);
+       if (unlikely(!temp)) {
+               /* Should never happen... */
+               dev_dbg(&tz->device, "temperature input lookup failed!\n");
+               return;
+       }
 
-       tz->hwmon = NULL;
-       device_remove_file(hwmon->device, &tz->temp_input.attr);
+       device_remove_file(hwmon->device, &temp->temp_input.attr);
        if (tz->ops->get_crit_temp)
-               device_remove_file(hwmon->device, &tz->temp_crit.attr);
+               device_remove_file(hwmon->device, &temp->temp_crit.attr);
 
        mutex_lock(&thermal_list_lock);
-       list_del(&tz->hwmon_node);
+       list_del(&temp->hwmon_node);
+       kfree(temp);
        if (!list_empty(&hwmon->tz_list)) {
                mutex_unlock(&thermal_list_lock);
                return;
index 69407e7..278aeaa 100644 (file)
@@ -336,7 +336,7 @@ config BACKLIGHT_PCF50633
          enable its driver.
 
 config BACKLIGHT_AAT2870
-       bool "AnalogicTech AAT2870 Backlight"
+       tristate "AnalogicTech AAT2870 Backlight"
        depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE
        help
          If you have a AnalogicTech AAT2870 say Y to enable the
index 4952a61..331f1ef 100644 (file)
@@ -44,7 +44,7 @@ static inline int aat2870_brightness(struct aat2870_bl_driver_data *aat2870_bl,
        struct backlight_device *bd = aat2870_bl->bd;
        int val;
 
-       val = brightness * aat2870_bl->max_current;
+       val = brightness * (aat2870_bl->max_current - 1);
        val /= bd->props.max_brightness;
 
        return val;
@@ -158,10 +158,10 @@ static int aat2870_bl_probe(struct platform_device *pdev)
        props.type = BACKLIGHT_RAW;
        bd = backlight_device_register("aat2870-backlight", &pdev->dev,
                                       aat2870_bl, &aat2870_bl_ops, &props);
-       if (!bd) {
+       if (IS_ERR(bd)) {
                dev_err(&pdev->dev,
                        "Failed allocate memory for backlight device\n");
-               ret = -ENOMEM;
+               ret = PTR_ERR(bd);
                goto out_kfree;
        }
 
@@ -175,7 +175,7 @@ static int aat2870_bl_probe(struct platform_device *pdev)
        else
                aat2870_bl->channels = AAT2870_BL_CH_ALL;
 
-       if (pdata->max_brightness > 0)
+       if (pdata->max_current > 0)
                aat2870_bl->max_current = pdata->max_current;
        else
                aat2870_bl->max_current = AAT2870_CURRENT_27_9;
index f441726..86b0735 100644 (file)
@@ -36,9 +36,6 @@ config WATCHDOG_CORE
          and gives them the /dev/watchdog interface (and later also the
          sysfs interface).
 
-         To compile this driver as a module, choose M here: the module will
-         be called watchdog.
-
 config WATCHDOG_NOWAYOUT
        bool "Disable watchdog shutdown on close"
        help
index afa78a5..809f41c 100644 (file)
@@ -458,7 +458,15 @@ static int __devexit nv_tco_remove(struct platform_device *dev)
 
 static void nv_tco_shutdown(struct platform_device *dev)
 {
+       u32 val;
+
        tco_timer_stop();
+
+       /* Some BIOSes fail the POST (once) if the NO_REBOOT flag is not
+        * unset during shutdown. */
+       pci_read_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, &val);
+       val &= ~MCP51_SMBUS_SETUP_B_TCO_REBOOT;
+       pci_write_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, val);
 }
 
 static struct platform_driver nv_tco_driver = {
index db84f23..a267dc0 100644 (file)
@@ -64,7 +64,7 @@
  * misses its deadline, the kernel timer will allow the WDT to overflow.
  */
 static int clock_division_ratio = WTCSR_CKS_4096;
-#define next_ping_period(cks)  msecs_to_jiffies(cks - 4)
+#define next_ping_period(cks)  (jiffies + msecs_to_jiffies(cks - 4))
 
 static const struct watchdog_info sh_wdt_info;
 static struct platform_device *sh_wdt_dev;
index 19891aa..9fe0b34 100644 (file)
@@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL
        select TMPFS_XATTR
        select GENERIC_ACL
        help
-         POSIX Access Control Lists (ACLs) support permissions for users and
-         groups beyond the owner/group/world scheme.
+         POSIX Access Control Lists (ACLs) support additional access rights
+         for users and groups beyond the standard owner/group/world scheme,
+         and this option selects support for ACLs specifically for tmpfs
+         filesystems.
+
+         If you've selected TMPFS, it's possible that you'll also need
+         this option as there are a number of Linux distros that require
+         POSIX ACL support under /dev for certain features to work properly.
+         For example, some distros need this feature for ALSA-related /dev
+         files for sound to work properly.  In short, if you're not sure,
+         say Y.
 
          To learn more about Access Control Lists, visit the POSIX ACLs for
          Linux website <http://acl.bestbits.at/>.
 
-         If you don't know what Access Control Lists are, say N.
-
 config TMPFS_XATTR
        bool "Tmpfs extended attributes"
        depends on TMPFS