Merge branch 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block
Linus Torvalds [Fri, 10 Oct 2008 17:52:45 +0000 (10:52 -0700)]
* 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block: (132 commits)
  doc/cdrom: Trvial documentation error, file not present
  block_dev: fix kernel-doc in new functions
  block: add some comments around the bio read-write flags
  block: mark bio_split_pool static
  block: Find bio sector offset given idx and offset
  block: gendisk integrity wrapper
  block: Switch blk_integrity_compare from bdev to gendisk
  block: Fix double put in blk_integrity_unregister
  block: Introduce integrity data ownership flag
  block: revert part of d7533ad0e132f92e75c1b2eb7c26387b25a583c1
  bio.h: Remove unused conditional code
  block: remove end_{queued|dequeued}_request()
  block: change elevator to use __blk_end_request()
  gdrom: change to use __blk_end_request()
  memstick: change to use __blk_end_request()
  virtio_blk: change to use __blk_end_request()
  blktrace: use BLKTRACE_BDEV_SIZE as the name size for setup structure
  block: add lld busy state exporting interface
  block: Fix blk_start_queueing() to not kick a stopped queue
  include blktrace_api.h in headers_install
  ...

413 files changed:
Documentation/00-INDEX
Documentation/cpu-freq/index.txt
Documentation/hwmon/adt7473
Documentation/hwmon/sysfs-interface
Documentation/kernel-parameters.txt
Documentation/x86/00-INDEX [new file with mode: 0644]
Documentation/x86/boot.txt [moved from Documentation/x86/i386/boot.txt with 99% similarity]
Documentation/x86/mtrr.txt [moved from Documentation/mtrr.txt with 99% similarity]
Documentation/x86/pat.txt
Documentation/x86/usb-legacy-support.txt [moved from Documentation/x86/i386/usb-legacy-support.txt with 100% similarity]
Documentation/x86/x86_64/boot-options.txt
Documentation/x86/zero-page.txt [moved from Documentation/x86/i386/zero-page.txt with 100% similarity]
Makefile
arch/x86/Kconfig
arch/x86/Kconfig.cpu
arch/x86/boot/compressed/head_32.S
arch/x86/boot/compressed/misc.c
arch/x86/boot/header.S
arch/x86/configs/i386_defconfig
arch/x86/configs/x86_64_defconfig
arch/x86/ia32/ia32_aout.c
arch/x86/ia32/ia32_signal.c
arch/x86/ia32/sys_ia32.c
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/alternative.c
arch/x86/kernel/aperture_64.c
arch/x86/kernel/apm_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/bios_uv.c
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/if.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/cpu/perfctr-watchdog.c
arch/x86/kernel/cpuid.c
arch/x86/kernel/crash_dump_64.c
arch/x86/kernel/ds.c
arch/x86/kernel/efi.c
arch/x86/kernel/entry_64.S
arch/x86/kernel/head64.c
arch/x86/kernel/ioport.c
arch/x86/kernel/ipi.c
arch/x86/kernel/irq_32.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/kvm.c
arch/x86/kernel/ldt.c
arch/x86/kernel/nmi.c
arch/x86/kernel/olpc.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/paravirt_patch_32.c
arch/x86/kernel/pci-dma.c
arch/x86/kernel/pci-gart_64.c
arch/x86/kernel/pcspeaker.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/reboot.c
arch/x86/kernel/setup.c
arch/x86/kernel/setup_percpu.c
arch/x86/kernel/sigframe.h
arch/x86/kernel/signal_32.c
arch/x86/kernel/signal_64.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/sys_i386_32.c
arch/x86/kernel/sys_x86_64.c
arch/x86/kernel/syscall_64.c
arch/x86/kernel/time_32.c
arch/x86/kernel/tls.c
arch/x86/kernel/traps_64.c
arch/x86/kernel/tsc.c
arch/x86/kernel/visws_quirks.c
arch/x86/kernel/vm86_32.c
arch/x86/kernel/vmi_32.c
arch/x86/lib/msr-on-cpu.c
arch/x86/lib/string_32.c
arch/x86/lib/strstr_32.c
arch/x86/mach-default/setup.c
arch/x86/mm/discontig_32.c
arch/x86/mm/dump_pagetables.c
arch/x86/mm/fault.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/ioremap.c
arch/x86/mm/numa_64.c
arch/x86/mm/pageattr.c
arch/x86/mm/pgtable.c
arch/x86/mm/pgtable_32.c
arch/x86/oprofile/op_model_p4.c
arch/x86/pci/amd_bus.c
arch/x86/pci/irq.c
arch/x86/power/hibernate_asm_32.S
arch/x86/xen/enlighten.c
drivers/ata/Kconfig
drivers/ata/ahci.c
drivers/ata/ata_piix.c
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/libata-scsi.c
drivers/ata/libata.h
drivers/ata/pata_bf54x.c
drivers/ata/pata_sil680.c
drivers/ata/sata_fsl.c
drivers/ata/sata_inic162x.c
drivers/ata/sata_mv.c
drivers/ata/sata_nv.c
drivers/ata/sata_promise.c
drivers/ata/sata_qstor.c
drivers/ata/sata_sil.c
drivers/ata/sata_sil24.c
drivers/ata/sata_sis.c
drivers/ata/sata_svw.c
drivers/ata/sata_uli.c
drivers/ata/sata_via.c
drivers/ata/sata_vsc.c
drivers/hwmon/abituguru3.c
drivers/hwmon/it87.c
drivers/misc/eeepc-laptop.c
drivers/pnp/Makefile
drivers/pnp/pnpacpi/core.c
drivers/pnp/pnpbios/core.c
drivers/scsi/libsas/sas_ata.c
fs/splice.c
include/asm-x86/a.out-core.h
include/asm-x86/a.out.h
include/asm-x86/acpi.h
include/asm-x86/agp.h
include/asm-x86/alternative.h
include/asm-x86/amd_iommu.h
include/asm-x86/amd_iommu_types.h
include/asm-x86/apic.h
include/asm-x86/apicdef.h
include/asm-x86/arch_hooks.h
include/asm-x86/asm.h
include/asm-x86/atomic_32.h
include/asm-x86/atomic_64.h
include/asm-x86/auxvec.h
include/asm-x86/bios_ebda.h
include/asm-x86/bitops.h
include/asm-x86/boot.h
include/asm-x86/bootparam.h
include/asm-x86/bug.h
include/asm-x86/bugs.h
include/asm-x86/byteorder.h
include/asm-x86/cache.h
include/asm-x86/cacheflush.h
include/asm-x86/calgary.h
include/asm-x86/checksum_32.h
include/asm-x86/checksum_64.h
include/asm-x86/cmpxchg_32.h
include/asm-x86/cmpxchg_64.h
include/asm-x86/compat.h
include/asm-x86/cpu.h
include/asm-x86/cpufeature.h
include/asm-x86/current.h
include/asm-x86/debugreg.h
include/asm-x86/delay.h
include/asm-x86/desc.h
include/asm-x86/desc_defs.h
include/asm-x86/device.h
include/asm-x86/div64.h
include/asm-x86/dma-mapping.h
include/asm-x86/dma.h
include/asm-x86/dmi.h
include/asm-x86/ds.h
include/asm-x86/dwarf2.h
include/asm-x86/e820.h
include/asm-x86/edac.h
include/asm-x86/efi.h
include/asm-x86/elf.h
include/asm-x86/emergency-restart.h
include/asm-x86/fb.h
include/asm-x86/fixmap.h
include/asm-x86/fixmap_32.h
include/asm-x86/fixmap_64.h
include/asm-x86/floppy.h
include/asm-x86/ftrace.h
include/asm-x86/futex.h
include/asm-x86/gart.h
include/asm-x86/genapic_32.h
include/asm-x86/genapic_64.h
include/asm-x86/geode.h
include/asm-x86/gpio.h
include/asm-x86/hardirq_32.h
include/asm-x86/hardirq_64.h
include/asm-x86/highmem.h
include/asm-x86/hpet.h
include/asm-x86/hugetlb.h
include/asm-x86/hw_irq.h
include/asm-x86/hypertransport.h
include/asm-x86/i387.h
include/asm-x86/i8253.h
include/asm-x86/i8259.h
include/asm-x86/ia32.h
include/asm-x86/ia32_unistd.h
include/asm-x86/idle.h
include/asm-x86/intel_arch_perfmon.h
include/asm-x86/io.h
include/asm-x86/io_32.h
include/asm-x86/io_64.h
include/asm-x86/io_apic.h
include/asm-x86/ioctls.h
include/asm-x86/iommu.h
include/asm-x86/ipcbuf.h
include/asm-x86/ipi.h
include/asm-x86/irq.h
include/asm-x86/irq_regs_32.h
include/asm-x86/irq_vectors.h
include/asm-x86/ist.h
include/asm-x86/k8.h
include/asm-x86/kdebug.h
include/asm-x86/kexec.h
include/asm-x86/kgdb.h
include/asm-x86/kmap_types.h
include/asm-x86/kprobes.h
include/asm-x86/kvm.h
include/asm-x86/kvm_host.h
include/asm-x86/kvm_para.h
include/asm-x86/kvm_x86_emulate.h
include/asm-x86/ldt.h
include/asm-x86/lguest.h
include/asm-x86/lguest_hcall.h
include/asm-x86/linkage.h
include/asm-x86/local.h
include/asm-x86/mach-bigsmp/mach_apic.h
include/asm-x86/mach-bigsmp/mach_apicdef.h
include/asm-x86/mach-bigsmp/mach_ipi.h
include/asm-x86/mach-default/apm.h
include/asm-x86/mach-default/mach_apic.h
include/asm-x86/mach-default/mach_apicdef.h
include/asm-x86/mach-default/mach_ipi.h
include/asm-x86/mach-default/mach_mpparse.h
include/asm-x86/mach-default/mach_mpspec.h
include/asm-x86/mach-default/mach_timer.h
include/asm-x86/mach-default/mach_traps.h
include/asm-x86/mach-default/mach_wakecpu.h
include/asm-x86/mach-es7000/mach_apic.h
include/asm-x86/mach-es7000/mach_apicdef.h
include/asm-x86/mach-es7000/mach_ipi.h
include/asm-x86/mach-es7000/mach_mpparse.h
include/asm-x86/mach-es7000/mach_wakecpu.h
include/asm-x86/mach-generic/gpio.h
include/asm-x86/mach-generic/irq_vectors_limits.h
include/asm-x86/mach-generic/mach_apic.h
include/asm-x86/mach-generic/mach_apicdef.h
include/asm-x86/mach-generic/mach_ipi.h
include/asm-x86/mach-generic/mach_mpparse.h
include/asm-x86/mach-generic/mach_mpspec.h
include/asm-x86/mach-numaq/mach_apic.h
include/asm-x86/mach-numaq/mach_apicdef.h
include/asm-x86/mach-numaq/mach_ipi.h
include/asm-x86/mach-numaq/mach_mpparse.h
include/asm-x86/mach-numaq/mach_wakecpu.h
include/asm-x86/mach-rdc321x/gpio.h
include/asm-x86/mach-summit/irq_vectors_limits.h
include/asm-x86/mach-summit/mach_apic.h
include/asm-x86/mach-summit/mach_apicdef.h
include/asm-x86/mach-summit/mach_ipi.h
include/asm-x86/mach-summit/mach_mpparse.h
include/asm-x86/math_emu.h
include/asm-x86/mc146818rtc.h
include/asm-x86/mca.h
include/asm-x86/mca_dma.h
include/asm-x86/mce.h
include/asm-x86/mman.h
include/asm-x86/mmconfig.h
include/asm-x86/mmu.h
include/asm-x86/mmu_context.h
include/asm-x86/mmu_context_32.h
include/asm-x86/mmu_context_64.h
include/asm-x86/mmx.h
include/asm-x86/mmzone_32.h
include/asm-x86/mmzone_64.h
include/asm-x86/module.h
include/asm-x86/mpspec.h
include/asm-x86/mpspec_def.h
include/asm-x86/msgbuf.h
include/asm-x86/msidef.h
include/asm-x86/msr-index.h
include/asm-x86/msr.h
include/asm-x86/mtrr.h
include/asm-x86/mutex_32.h
include/asm-x86/mutex_64.h
include/asm-x86/nmi.h
include/asm-x86/nops.h
include/asm-x86/numa_32.h
include/asm-x86/numa_64.h
include/asm-x86/numaq.h
include/asm-x86/olpc.h
include/asm-x86/page.h
include/asm-x86/page_32.h
include/asm-x86/page_64.h
include/asm-x86/param.h
include/asm-x86/paravirt.h
include/asm-x86/parport.h
include/asm-x86/pat.h
include/asm-x86/pci-direct.h
include/asm-x86/pci.h
include/asm-x86/pci_32.h
include/asm-x86/pci_64.h
include/asm-x86/pda.h
include/asm-x86/percpu.h
include/asm-x86/pgalloc.h
include/asm-x86/pgtable-2level-defs.h
include/asm-x86/pgtable-2level.h
include/asm-x86/pgtable-3level-defs.h
include/asm-x86/pgtable-3level.h
include/asm-x86/pgtable.h
include/asm-x86/pgtable_32.h
include/asm-x86/pgtable_64.h
include/asm-x86/posix_types_32.h
include/asm-x86/posix_types_64.h
include/asm-x86/prctl.h
include/asm-x86/processor-flags.h
include/asm-x86/processor.h
include/asm-x86/proto.h
include/asm-x86/ptrace-abi.h
include/asm-x86/ptrace.h
include/asm-x86/pvclock-abi.h
include/asm-x86/pvclock.h
include/asm-x86/reboot.h
include/asm-x86/reboot_fixups.h
include/asm-x86/required-features.h
include/asm-x86/resume-trace.h
include/asm-x86/rio.h
include/asm-x86/rwlock.h
include/asm-x86/rwsem.h
include/asm-x86/scatterlist.h
include/asm-x86/seccomp_32.h
include/asm-x86/seccomp_64.h
include/asm-x86/segment.h
include/asm-x86/sembuf.h
include/asm-x86/serial.h
include/asm-x86/setup.h
include/asm-x86/shmbuf.h
include/asm-x86/shmparam.h
include/asm-x86/sigcontext.h
include/asm-x86/sigcontext32.h
include/asm-x86/siginfo.h
include/asm-x86/signal.h
include/asm-x86/smp.h
include/asm-x86/socket.h
include/asm-x86/sockios.h
include/asm-x86/sparsemem.h
include/asm-x86/spinlock.h
include/asm-x86/spinlock_types.h
include/asm-x86/srat.h
include/asm-x86/stacktrace.h
include/asm-x86/stat.h
include/asm-x86/statfs.h
include/asm-x86/string_32.h
include/asm-x86/string_64.h
include/asm-x86/suspend_32.h
include/asm-x86/suspend_64.h
include/asm-x86/swiotlb.h
include/asm-x86/sync_bitops.h
include/asm-x86/syscall.h [new file with mode: 0644]
include/asm-x86/syscalls.h [new file with mode: 0644]
include/asm-x86/system.h
include/asm-x86/system_64.h
include/asm-x86/tce.h
include/asm-x86/termbits.h
include/asm-x86/termios.h
include/asm-x86/therm_throt.h
include/asm-x86/thread_info.h
include/asm-x86/time.h
include/asm-x86/timer.h
include/asm-x86/timex.h
include/asm-x86/tlb.h
include/asm-x86/tlbflush.h
include/asm-x86/topology.h
include/asm-x86/trampoline.h
include/asm-x86/traps.h
include/asm-x86/tsc.h
include/asm-x86/types.h
include/asm-x86/uaccess.h
include/asm-x86/uaccess_32.h
include/asm-x86/uaccess_64.h
include/asm-x86/ucontext.h
include/asm-x86/unaligned.h
include/asm-x86/unistd_32.h
include/asm-x86/unistd_64.h
include/asm-x86/unwind.h
include/asm-x86/user32.h
include/asm-x86/user_32.h
include/asm-x86/user_64.h
include/asm-x86/uv/bios.h
include/asm-x86/uv/uv_bau.h
include/asm-x86/uv/uv_hub.h
include/asm-x86/uv/uv_mmrs.h
include/asm-x86/vdso.h
include/asm-x86/vga.h
include/asm-x86/vgtod.h
include/asm-x86/visws/cobalt.h
include/asm-x86/visws/lithium.h
include/asm-x86/visws/piix4.h
include/asm-x86/vm86.h
include/asm-x86/vmi_time.h
include/asm-x86/vsyscall.h
include/asm-x86/xen/events.h
include/asm-x86/xen/grant_table.h
include/asm-x86/xen/hypercall.h
include/asm-x86/xen/hypervisor.h
include/asm-x86/xen/interface.h
include/asm-x86/xen/interface_32.h
include/asm-x86/xen/interface_64.h
include/asm-x86/xen/page.h
include/linux/ata.h
include/linux/libata.h
mm/slob.c

index 5b5aba4..7306081 100644 (file)
@@ -251,8 +251,6 @@ mono.txt
        - how to execute Mono-based .NET binaries with the help of BINFMT_MISC.
 moxa-smartio
        - file with info on installing/using Moxa multiport serial driver.
-mtrr.txt
-       - how to use PPro Memory Type Range Registers to increase performance.
 mutex-design.txt
        - info on the generic mutex subsystem.
 namespaces/
index ffdb532..3d0b915 100644 (file)
@@ -35,11 +35,9 @@ Mailing List
 ------------
 There is a CPU frequency changing CVS commit and general list where
 you can report bugs, problems or submit patches. To post a message,
-send an email to cpufreq@lists.linux.org.uk, to subscribe go to
-http://lists.linux.org.uk/mailman/listinfo/cpufreq. Previous post to the
-mailing list are available to subscribers at
-http://lists.linux.org.uk/mailman/private/cpufreq/.
-
+send an email to cpufreq@vger.kernel.org, to subscribe go to
+http://vger.kernel.org/vger-lists.html#cpufreq and follow the
+instructions there.
 
 Links
 -----
@@ -50,7 +48,7 @@ how to access the CVS repository:
 * http://cvs.arm.linux.org.uk/
 
 the CPUFreq Mailing list:
-* http://lists.linux.org.uk/mailman/listinfo/cpufreq
+* http://vger.kernel.org/vger-lists.html#cpufreq
 
 Clock and voltage scaling for the SA-1100:
 * http://www.lartmaker.nl/projects/scaling
index 2126de3..1cbf671 100644 (file)
@@ -14,14 +14,14 @@ Description
 
 This driver implements support for the Analog Devices ADT7473 chip family.
 
-The LM85 uses the 2-wire interface compatible with the SMBUS 2.0
+The ADT7473 uses the 2-wire interface compatible with the SMBUS 2.0
 specification. Using an analog to digital converter it measures three (3)
-temperatures and two (2) voltages. It has three (3) 16-bit counters for
+temperatures and two (2) voltages. It has four (4) 16-bit counters for
 measuring fan speed. There are three (3) PWM outputs that can be used
 to control fan speed.
 
 A sophisticated control system for the PWM outputs is designed into the
-LM85 that allows fan speed to be adjusted automatically based on any of the
+ADT7473 that allows fan speed to be adjusted automatically based on any of the
 three temperature sensors. Each PWM output is individually adjustable and
 programmable. Once configured, the ADT7473 will adjust the PWM outputs in
 response to the measured temperatures without further host intervention.
@@ -46,14 +46,6 @@ from the raw value to get the temperature value.
 The Analog Devices datasheet is very detailed and describes a procedure for
 determining an optimal configuration for the automatic PWM control.
 
-Hardware Configurations
------------------------
-
-The ADT7473 chips have an optional SMBALERT output that can be used to
-signal the chipset in case a limit is exceeded or the temperature sensors
-fail. Individual sensor interrupts can be masked so they won't trigger
-SMBALERT. The SMBALERT output if configured replaces the PWM2 function.
-
 Configuration Notes
 -------------------
 
@@ -61,8 +53,8 @@ Besides standard interfaces driver adds the following:
 
 * PWM Control
 
-* pwm#_auto_point1_pwm and pwm#_auto_point1_temp and
-* pwm#_auto_point2_pwm and pwm#_auto_point2_temp -
+* pwm#_auto_point1_pwm and temp#_auto_point1_temp and
+* pwm#_auto_point2_pwm and temp#_auto_point2_temp -
 
 point1: Set the pwm speed at a lower temperature bound.
 point2: Set the pwm speed at a higher temperature bound.
index 2d84573..6dbfd5e 100644 (file)
@@ -329,6 +329,10 @@ power[1-*]_average         Average power use
                                Unit: microWatt
                                RO
 
+power[1-*]_average_interval    Power use averaging interval
+                               Unit: milliseconds
+                               RW
+
 power[1-*]_average_highest     Historical average maximum power use
                                Unit: microWatt
                                RO
@@ -354,6 +358,14 @@ power[1-*]_reset_history   Reset input_highest, input_lowest,
                                WO
 
 **********
+* Energy *
+**********
+
+energy[1-*]_input              Cumulative energy use
+                               Unit: microJoule
+                               RO
+
+**********
 * Alarms *
 **********
 
index 1150444..329dcab 100644 (file)
@@ -463,12 +463,6 @@ and is between 256 and 4096 characters. It is defined in the file
                        Range: 0 - 8192
                        Default: 64
 
-       disable_8254_timer
-       enable_8254_timer
-                       [IA32/X86_64] Disable/Enable interrupt 0 timer routing
-                       over the 8254 in addition to over the IO-APIC. The
-                       kernel tries to set a sensible default.
-
        hpet=           [X86-32,HPET] option to control HPET usage
                        Format: { enable (default) | disable | force }
                        disable: disable HPET and use PIT instead
@@ -1882,6 +1876,12 @@ and is between 256 and 4096 characters. It is defined in the file
        shapers=        [NET]
                        Maximal number of shapers.
 
+       show_msr=       [x86] show boot-time MSR settings
+                       Format: { <integer> }
+                       Show boot-time (BIOS-initialized) MSR settings.
+                       The parameter means the number of CPUs to show,
+                       for example 1 means boot CPU only.
+
        sim710=         [SCSI,HW]
                        See header of drivers/scsi/sim710.c.
 
diff --git a/Documentation/x86/00-INDEX b/Documentation/x86/00-INDEX
new file mode 100644 (file)
index 0000000..dbe3377
--- /dev/null
@@ -0,0 +1,4 @@
+00-INDEX
+       - this file
+mtrr.txt
+       - how to use x86 Memory Type Range Registers to increase performance
similarity index 99%
rename from Documentation/x86/i386/boot.txt
rename to Documentation/x86/boot.txt
index 147bfe5..83c0033 100644 (file)
@@ -308,7 +308,7 @@ Protocol:   2.00+
 
 Field name:    start_sys
 Type:          read
-Offset/size:   0x20c/4
+Offset/size:   0x20c/2
 Protocol:      2.00+
 
   The load low segment (0x1000).  Obsolete.
similarity index 99%
rename from Documentation/mtrr.txt
rename to Documentation/x86/mtrr.txt
index c39ac39..cc071dc 100644 (file)
@@ -18,7 +18,7 @@ Richard Gooch
   The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
   MTRRs. These are supported.  The AMD Athlon family provide 8 Intel
   style MTRRs.
-  
+
   The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These
   are supported.
 
@@ -87,7 +87,7 @@ reg00: base=0x00000000 (   0MB), size=  64MB: write-back, count=1
 reg01: base=0xfb000000 (4016MB), size=  16MB: write-combining, count=1
 reg02: base=0xfb000000 (4016MB), size=   4kB: uncachable, count=1
 
-Some cards (especially Voodoo Graphics boards) need this 4 kB area 
+Some cards (especially Voodoo Graphics boards) need this 4 kB area
 excluded from the beginning of the region because it is used for
 registers.
 
index 17965f9..c93ff5f 100644 (file)
@@ -14,6 +14,10 @@ PAT allows for different types of memory attributes. The most commonly used
 ones that will be supported at this time are Write-back, Uncached,
 Write-combined and Uncached Minus.
 
+
+PAT APIs
+--------
+
 There are many different APIs in the kernel that allows setting of memory
 attributes at the page level. In order to avoid aliasing, these interfaces
 should be used thoughtfully. Below is a table of interfaces available,
@@ -26,38 +30,38 @@ address range to avoid any aliasing.
 API                    |    RAM   |  ACPI,...  |  Reserved/Holes  |
 -----------------------|----------|------------|------------------|
                        |          |            |                  |
-ioremap                |    --    |    UC      |       UC         |
+ioremap                |    --    |    UC-     |       UC-        |
                        |          |            |                  |
 ioremap_cache          |    --    |    WB      |       WB         |
                        |          |            |                  |
-ioremap_nocache        |    --    |    UC      |       UC         |
+ioremap_nocache        |    --    |    UC-     |       UC-        |
                        |          |            |                  |
 ioremap_wc             |    --    |    --      |       WC         |
                        |          |            |                  |
-set_memory_uc          |    UC    |    --      |       --         |
+set_memory_uc          |    UC-   |    --      |       --         |
  set_memory_wb         |          |            |                  |
                        |          |            |                  |
 set_memory_wc          |    WC    |    --      |       --         |
  set_memory_wb         |          |            |                  |
                        |          |            |                  |
-pci sysfs resource     |    --    |    --      |       UC         |
+pci sysfs resource     |    --    |    --      |       UC-        |
                        |          |            |                  |
 pci sysfs resource_wc  |    --    |    --      |       WC         |
  is IORESOURCE_PREFETCH|          |            |                  |
                        |          |            |                  |
-pci proc               |    --    |    --      |       UC         |
+pci proc               |    --    |    --      |       UC-        |
  !PCIIOC_WRITE_COMBINE |          |            |                  |
                        |          |            |                  |
 pci proc               |    --    |    --      |       WC         |
  PCIIOC_WRITE_COMBINE  |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |    UC      |       UC         |
+/dev/mem               |    --    |  WB/WC/UC- |    WB/WC/UC-     |
  read-write            |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |    UC      |       UC         |
+/dev/mem               |    --    |    UC-     |       UC-        |
  mmap SYNC flag        |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |  WB/WC/UC  |    WB/WC/UC      |
+/dev/mem               |    --    |  WB/WC/UC- |    WB/WC/UC-     |
  mmap !SYNC flag       |          |(from exist-|  (from exist-    |
  and                   |          |  ing alias)|    ing alias)    |
  any alias to this area|          |            |                  |
@@ -68,7 +72,7 @@ pci proc               |    --    |    --      |       WC         |
  and                   |          |            |                  |
  MTRR says WB          |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |    --      |    UC_MINUS      |
+/dev/mem               |    --    |    --      |       UC-        |
  mmap !SYNC flag       |          |            |                  |
  no alias to this area |          |            |                  |
  and                   |          |            |                  |
@@ -98,3 +102,35 @@ types.
 
 Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
 
+
+PAT debugging
+-------------
+
+With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by
+
+# mount -t debugfs debugfs /sys/kernel/debug
+# cat /sys/kernel/debug/x86/pat_memtype_list
+PAT memtype list:
+uncached-minus @ 0x7fadf000-0x7fae0000
+uncached-minus @ 0x7fb19000-0x7fb1a000
+uncached-minus @ 0x7fb1a000-0x7fb1b000
+uncached-minus @ 0x7fb1b000-0x7fb1c000
+uncached-minus @ 0x7fb1c000-0x7fb1d000
+uncached-minus @ 0x7fb1d000-0x7fb1e000
+uncached-minus @ 0x7fb1e000-0x7fb25000
+uncached-minus @ 0x7fb25000-0x7fb26000
+uncached-minus @ 0x7fb26000-0x7fb27000
+uncached-minus @ 0x7fb27000-0x7fb28000
+uncached-minus @ 0x7fb28000-0x7fb2e000
+uncached-minus @ 0x7fb2e000-0x7fb2f000
+uncached-minus @ 0x7fb2f000-0x7fb30000
+uncached-minus @ 0x7fb31000-0x7fb32000
+uncached-minus @ 0x80000000-0x90000000
+
+This list shows physical address ranges and various PAT settings used to
+access those physical address ranges.
+
+Another, more verbose way of getting PAT related debug messages is with
+"debugpat" boot parameter. With this parameter, various debug messages are
+printed to dmesg log.
+
index b0c7b6c..72ffb53 100644 (file)
@@ -54,10 +54,6 @@ APICs
                 apicmaintimer. Useful when your PIT timer is totally
                 broken.
 
-   disable_8254_timer / enable_8254_timer
-                Enable interrupt 0 timer routing over the 8254 in addition to over
-                the IO-APIC. The kernel tries to set a sensible default.
-
 Early Console
 
    syntax: earlyprintk=vga
index ce9eceb..16e3fbb 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 27
-EXTRAVERSION = -rc9
+EXTRAVERSION =
 NAME = Rotary Wombat
 
 # *DOCUMENTATION*
index ed92864..97f0d2b 100644 (file)
@@ -29,6 +29,7 @@ config X86
        select HAVE_FTRACE
        select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
        select HAVE_ARCH_KGDB if !X86_VOYAGER
+       select HAVE_ARCH_TRACEHOOK
        select HAVE_GENERIC_DMA_COHERENT if X86_32
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
 
@@ -1020,7 +1021,7 @@ config HAVE_ARCH_ALLOC_REMAP
 
 config ARCH_FLATMEM_ENABLE
        def_bool y
-       depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA
+       depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
 
 config ARCH_DISCONTIGMEM_ENABLE
        def_bool y
@@ -1036,7 +1037,7 @@ config ARCH_SPARSEMEM_DEFAULT
 
 config ARCH_SPARSEMEM_ENABLE
        def_bool y
-       depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
+       depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH
        select SPARSEMEM_STATIC if X86_32
        select SPARSEMEM_VMEMMAP_ENABLE if X86_64
 
@@ -1117,10 +1118,10 @@ config MTRR
          You can safely say Y even if your machine doesn't have MTRRs, you'll
          just add about 9 KB to your kernel.
 
-         See <file:Documentation/mtrr.txt> for more information.
+         See <file:Documentation/x86/mtrr.txt> for more information.
 
 config MTRR_SANITIZER
-       bool
+       def_bool y
        prompt "MTRR cleanup support"
        depends on MTRR
        help
@@ -1131,7 +1132,7 @@ config MTRR_SANITIZER
          The largest mtrr entry size for a continous block can be set with
          mtrr_chunk_size.
 
-         If unsure, say N.
+         If unsure, say Y.
 
 config MTRR_SANITIZER_ENABLE_DEFAULT
        int "MTRR cleanup enable value (0-1)"
@@ -1191,7 +1192,6 @@ config IRQBALANCE
 config SECCOMP
        def_bool y
        prompt "Enable seccomp to safely compute untrusted bytecode"
-       depends on PROC_FS
        help
          This kernel feature is useful for number crunching applications
          that may need to compute untrusted bytecode during their
@@ -1199,7 +1199,7 @@ config SECCOMP
          the process as file descriptors supporting the read/write
          syscalls, it's possible to isolate those applications in
          their own address space using seccomp. Once seccomp is
-         enabled via /proc/<pid>/seccomp, it cannot be disabled
+         enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
          and the task is only allowed to execute a few safe syscalls
          defined by each seccomp mode.
 
@@ -1356,14 +1356,14 @@ config PHYSICAL_ALIGN
          Don't change this unless you know what you are doing.
 
 config HOTPLUG_CPU
-       bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)"
-       depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER
+       bool "Support for hot-pluggable CPUs"
+       depends on SMP && HOTPLUG && !X86_VOYAGER
        ---help---
-         Say Y here to experiment with turning CPUs off and on, and to
-         enable suspend on SMP systems. CPUs can be controlled through
-         /sys/devices/system/cpu.
-         Say N if you want to disable CPU hotplug and don't need to
-         suspend.
+         Say Y here to allow turning CPUs off and on. CPUs can be
+         controlled through /sys/devices/system/cpu.
+         ( Note: power management support will enable this option
+           automatically on SMP systems. )
+         Say N if you want to disable CPU hotplug.
 
 config COMPAT_VDSO
        def_bool y
@@ -1378,6 +1378,51 @@ config COMPAT_VDSO
 
          If unsure, say Y.
 
+config CMDLINE_BOOL
+       bool "Built-in kernel command line"
+       default n
+       help
+         Allow for specifying boot arguments to the kernel at
+         build time.  On some systems (e.g. embedded ones), it is
+         necessary or convenient to provide some or all of the
+         kernel boot arguments with the kernel itself (that is,
+         to not rely on the boot loader to provide them.)
+
+         To compile command line arguments into the kernel,
+         set this option to 'Y', then fill in the
+         the boot arguments in CONFIG_CMDLINE.
+
+         Systems with fully functional boot loaders (i.e. non-embedded)
+         should leave this option set to 'N'.
+
+config CMDLINE
+       string "Built-in kernel command string"
+       depends on CMDLINE_BOOL
+       default ""
+       help
+         Enter arguments here that should be compiled into the kernel
+         image and used at boot time.  If the boot loader provides a
+         command line at boot time, it is appended to this string to
+         form the full kernel command line, when the system boots.
+
+         However, you can use the CONFIG_CMDLINE_OVERRIDE option to
+         change this behavior.
+
+         In most cases, the command line (whether built-in or provided
+         by the boot loader) should specify the device for the root
+         file system.
+
+config CMDLINE_OVERRIDE
+       bool "Built-in command line overrides boot loader arguments"
+       default n
+       depends on CMDLINE_BOOL
+       help
+         Set this option to 'Y' to have the kernel ignore the boot loader
+         command line, and use ONLY the built-in command line.
+
+         This is used to work around broken boot loaders.  This should
+         be set to 'N' under normal conditions.
+
 endmenu
 
 config ARCH_ENABLE_MEMORY_HOTPLUG
@@ -1773,7 +1818,7 @@ config COMPAT_FOR_U64_ALIGNMENT
 
 config SYSVIPC_COMPAT
        def_bool y
-       depends on X86_64 && COMPAT && SYSVIPC
+       depends on COMPAT && SYSVIPC
 
 endmenu
 
index b225219..60a8576 100644 (file)
@@ -418,3 +418,21 @@ config X86_MINIMUM_CPU_FAMILY
 config X86_DEBUGCTLMSR
        def_bool y
        depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+
+config X86_DS
+       bool "Debug Store support"
+       default y
+       help
+         Add support for Debug Store.
+         This allows the kernel to provide a memory buffer to the hardware
+         to store various profiling and tracing events.
+
+config X86_PTRACE_BTS
+       bool "ptrace interface to Branch Trace Store"
+       default y
+       depends on (X86_DS && X86_DEBUGCTLMSR)
+       help
+         Add a ptrace interface to allow collecting an execution trace
+         of the traced task.
+         This collects control flow changes in a (cyclic) buffer and allows
+         debuggers to fill in the gaps and show an execution trace of the debuggee.
index ba7736c..29c5fbf 100644 (file)
@@ -137,14 +137,15 @@ relocated:
  */
        movl output_len(%ebx), %eax
        pushl %eax
+                       # push arguments for decompress_kernel:
        pushl %ebp      # output address
        movl input_len(%ebx), %eax
        pushl %eax      # input_len
        leal input_data(%ebx), %eax
        pushl %eax      # input_data
        leal boot_heap(%ebx), %eax
-       pushl %eax      # heap area as third argument
-       pushl %esi      # real mode pointer as second arg
+       pushl %eax      # heap area
+       pushl %esi      # real mode pointer
        call decompress_kernel
        addl $20, %esp
        popl %ecx
index 9fea737..5780d36 100644 (file)
@@ -16,7 +16,7 @@
  */
 #undef CONFIG_PARAVIRT
 #ifdef CONFIG_X86_32
-#define _ASM_DESC_H_ 1
+#define ASM_X86__DESC_H 1
 #endif
 
 #ifdef CONFIG_X86_64
@@ -27,7 +27,7 @@
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
 #include <linux/elf.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/page.h>
 #include <asm/boot.h>
 #include <asm/bootparam.h>
@@ -251,7 +251,7 @@ static void __putstr(int error, const char *s)
                                y--;
                        }
                } else {
-                       vidmem [(x + cols * y) * 2] = c;
+                       vidmem[(x + cols * y) * 2] = c;
                        if (++x >= cols) {
                                x = 0;
                                if (++y >= lines) {
@@ -277,7 +277,8 @@ static void *memset(void *s, int c, unsigned n)
        int i;
        char *ss = s;
 
-       for (i = 0; i < n; i++) ss[i] = c;
+       for (i = 0; i < n; i++)
+               ss[i] = c;
        return s;
 }
 
@@ -287,7 +288,8 @@ static void *memcpy(void *dest, const void *src, unsigned n)
        const char *s = src;
        char *d = dest;
 
-       for (i = 0; i < n; i++) d[i] = s[i];
+       for (i = 0; i < n; i++)
+               d[i] = s[i];
        return dest;
 }
 
index af86e43..b993062 100644 (file)
@@ -30,7 +30,6 @@ SYSSEG                = DEF_SYSSEG            /* system loaded at 0x10000 (65536) */
 SYSSIZE                = DEF_SYSSIZE           /* system size: # of 16-byte clicks */
                                        /* to be loaded */
 ROOT_DEV       = 0                     /* ROOT_DEV is now written by "build" */
-SWAP_DEV       = 0                     /* SWAP_DEV is now written by "build" */
 
 #ifndef SVGA_MODE
 #define SVGA_MODE ASK_VGA
index 104275e..ef9a520 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc4
-# Mon Aug 25 15:04:00 2008
+# Linux kernel version: 2.6.27-rc5
+# Wed Sep  3 17:23:09 2008
 #
 # CONFIG_64BIT is not set
 CONFIG_X86_32=y
@@ -202,7 +202,7 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 # CONFIG_M586 is not set
 # CONFIG_M586TSC is not set
 # CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
+CONFIG_M686=y
 # CONFIG_MPENTIUMII is not set
 # CONFIG_MPENTIUMIII is not set
 # CONFIG_MPENTIUMM is not set
@@ -221,13 +221,14 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_MVIAC7 is not set
 # CONFIG_MPSC is not set
-CONFIG_MCORE2=y
+# CONFIG_MCORE2 is not set
 # CONFIG_GENERIC_CPU is not set
 CONFIG_X86_GENERIC=y
 CONFIG_X86_CPU=y
 CONFIG_X86_CMPXCHG=y
 CONFIG_X86_L1_CACHE_SHIFT=7
 CONFIG_X86_XADD=y
+# CONFIG_X86_PPRO_FENCE is not set
 CONFIG_X86_WP_WORKS_OK=y
 CONFIG_X86_INVLPG=y
 CONFIG_X86_BSWAP=y
@@ -235,14 +236,15 @@ CONFIG_X86_POPAD_OK=y
 CONFIG_X86_INTEL_USERCOPY=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
 CONFIG_X86_TSC=y
+CONFIG_X86_CMOV=y
 CONFIG_X86_MINIMUM_CPU_FAMILY=4
 CONFIG_X86_DEBUGCTLMSR=y
 CONFIG_HPET_TIMER=y
 CONFIG_HPET_EMULATE_RTC=y
 CONFIG_DMI=y
 # CONFIG_IOMMU_HELPER is not set
-CONFIG_NR_CPUS=4
-# CONFIG_SCHED_SMT is not set
+CONFIG_NR_CPUS=64
+CONFIG_SCHED_SMT=y
 CONFIG_SCHED_MC=y
 # CONFIG_PREEMPT_NONE is not set
 CONFIG_PREEMPT_VOLUNTARY=y
@@ -254,7 +256,8 @@ CONFIG_VM86=y
 # CONFIG_TOSHIBA is not set
 # CONFIG_I8K is not set
 CONFIG_X86_REBOOTFIXUPS=y
-# CONFIG_MICROCODE is not set
+CONFIG_MICROCODE=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 # CONFIG_NOHIGHMEM is not set
@@ -2115,7 +2118,7 @@ CONFIG_IO_DELAY_0X80=y
 CONFIG_DEFAULT_IO_DELAY_TYPE=0
 CONFIG_DEBUG_BOOT_PARAMS=y
 # CONFIG_CPA_DEBUG is not set
-# CONFIG_OPTIMIZE_INLINING is not set
+CONFIG_OPTIMIZE_INLINING=y
 
 #
 # Security options
index 678c8ac..e620ea6 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc4
-# Mon Aug 25 14:40:46 2008
+# Linux kernel version: 2.6.27-rc5
+# Wed Sep  3 17:13:39 2008
 #
 CONFIG_64BIT=y
 # CONFIG_X86_32 is not set
@@ -218,17 +218,14 @@ CONFIG_X86_PC=y
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_MVIAC7 is not set
 # CONFIG_MPSC is not set
-CONFIG_MCORE2=y
-# CONFIG_GENERIC_CPU is not set
+# CONFIG_MCORE2 is not set
+CONFIG_GENERIC_CPU=y
 CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=64
-CONFIG_X86_INTERNODE_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_BYTES=128
+CONFIG_X86_INTERNODE_CACHE_BYTES=128
 CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_L1_CACHE_SHIFT=7
 CONFIG_X86_WP_WORKS_OK=y
-CONFIG_X86_INTEL_USERCOPY=y
-CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_P6_NOP=y
 CONFIG_X86_TSC=y
 CONFIG_X86_CMPXCHG64=y
 CONFIG_X86_CMOV=y
@@ -243,9 +240,8 @@ CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
 CONFIG_AMD_IOMMU=y
 CONFIG_SWIOTLB=y
 CONFIG_IOMMU_HELPER=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS=4
-# CONFIG_SCHED_SMT is not set
+CONFIG_NR_CPUS=64
+CONFIG_SCHED_SMT=y
 CONFIG_SCHED_MC=y
 # CONFIG_PREEMPT_NONE is not set
 CONFIG_PREEMPT_VOLUNTARY=y
@@ -254,7 +250,8 @@ CONFIG_X86_LOCAL_APIC=y
 CONFIG_X86_IO_APIC=y
 # CONFIG_X86_MCE is not set
 # CONFIG_I8K is not set
-# CONFIG_MICROCODE is not set
+CONFIG_MICROCODE=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 CONFIG_NUMA=y
@@ -290,7 +287,7 @@ CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_MTRR=y
 # CONFIG_MTRR_SANITIZER is not set
-# CONFIG_X86_PAT is not set
+CONFIG_X86_PAT=y
 CONFIG_EFI=y
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
@@ -2089,7 +2086,7 @@ CONFIG_IO_DELAY_0X80=y
 CONFIG_DEFAULT_IO_DELAY_TYPE=0
 CONFIG_DEBUG_BOOT_PARAMS=y
 # CONFIG_CPA_DEBUG is not set
-# CONFIG_OPTIMIZE_INLINING is not set
+CONFIG_OPTIMIZE_INLINING=y
 
 #
 # Security options
index a0e1dbe..127ec3f 100644 (file)
@@ -85,8 +85,10 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
        dump->regs.ax = regs->ax;
        dump->regs.ds = current->thread.ds;
        dump->regs.es = current->thread.es;
-       asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
-       asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
+       savesegment(fs, fs);
+       dump->regs.fs = fs;
+       savesegment(gs, gs);
+       dump->regs.gs = gs;
        dump->regs.orig_ax = regs->orig_ax;
        dump->regs.ip = regs->ip;
        dump->regs.cs = regs->cs;
@@ -430,8 +432,9 @@ beyond_if:
        current->mm->start_stack =
                (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
        /* start thread */
-       asm volatile("movl %0,%%fs" :: "r" (0)); \
-       asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
+       loadsegment(fs, 0);
+       loadsegment(ds, __USER32_DS);
+       loadsegment(es, __USER32_DS);
        load_gs_index(0);
        (regs)->ip = ex.a_entry;
        (regs)->sp = current->mm->start_stack;
index 20af4c7..f1a2ac7 100644 (file)
@@ -206,7 +206,7 @@ struct rt_sigframe
        { unsigned int cur;                                             \
          unsigned short pre;                                           \
          err |= __get_user(pre, &sc->seg);                             \
-         asm volatile("movl %%" #seg ",%0" : "=r" (cur));              \
+         savesegment(seg, cur);                                        \
          pre |= mask;                                                  \
          if (pre != cur) loadsegment(seg, pre); }
 
@@ -235,7 +235,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
         */
        err |= __get_user(gs, &sc->gs);
        gs |= 3;
-       asm("movl %%gs,%0" : "=r" (oldgs));
+       savesegment(gs, oldgs);
        if (gs != oldgs)
                load_gs_index(gs);
 
@@ -355,14 +355,13 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 {
        int tmp, err = 0;
 
-       tmp = 0;
-       __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+       savesegment(gs, tmp);
        err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
-       __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+       savesegment(fs, tmp);
        err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
-       __asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp));
+       savesegment(ds, tmp);
        err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
-       __asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
+       savesegment(es, tmp);
        err |= __put_user(tmp, (unsigned int __user *)&sc->es);
 
        err |= __put_user((u32)regs->di, &sc->di);
@@ -498,8 +497,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
        regs->dx = 0;
        regs->cx = 0;
 
-       asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
-       asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+       loadsegment(ds, __USER32_DS);
+       loadsegment(es, __USER32_DS);
 
        regs->cs = __USER32_CS;
        regs->ss = __USER32_DS;
@@ -591,8 +590,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
        regs->dx = (unsigned long) &frame->info;
        regs->cx = (unsigned long) &frame->uc;
 
-       asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
-       asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+       loadsegment(ds, __USER32_DS);
+       loadsegment(es, __USER32_DS);
 
        regs->cs = __USER32_CS;
        regs->ss = __USER32_DS;
index d3c6408..beda423 100644 (file)
@@ -556,15 +556,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
        return ret;
 }
 
-/* These are here just in case some old ia32 binary calls it. */
-asmlinkage long sys32_pause(void)
-{
-       current->state = TASK_INTERRUPTIBLE;
-       schedule();
-       return -ERESTARTNOHAND;
-}
-
-
 #ifdef CONFIG_SYSCTL_SYSCALL
 struct sysctl_ia32 {
        unsigned int    name;
index c102af8..7d40ef7 100644 (file)
@@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled);
 #ifdef CONFIG_X86_64
 
 #include <asm/proto.h>
-#include <asm/genapic.h>
 
 #else                          /* X86 */
 
@@ -97,8 +96,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 #warning ACPI uses CMPXCHG, i486 and later hardware
 #endif
 
-static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
-
 /* --------------------------------------------------------------------------
                               Boot-time Configuration
    -------------------------------------------------------------------------- */
@@ -160,6 +157,8 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 struct acpi_mcfg_allocation *pci_mmcfg_config;
 int pci_mmcfg_config_num;
 
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
 static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
 {
        if (!strcmp(mcfg->header.oem_id, "SGI"))
index 65a0c1b..fb04e49 100644 (file)
@@ -231,25 +231,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
                        continue;
                if (*ptr > text_end)
                        continue;
-               text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
+               /* turn DS segment override prefix into lock prefix */
+               text_poke(*ptr, ((unsigned char []){0xf0}), 1);
        };
 }
 
 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 {
        u8 **ptr;
-       char insn[1];
 
        if (noreplace_smp)
                return;
 
-       add_nops(insn, 1);
        for (ptr = start; ptr < end; ptr++) {
                if (*ptr < text)
                        continue;
                if (*ptr > text_end)
                        continue;
-               text_poke(*ptr, insn, 1);
+               /* turn lock prefix into DS segment override prefix */
+               text_poke(*ptr, ((unsigned char []){0x3E}), 1);
        };
 }
 
index 44e2182..9a32b37 100644 (file)
@@ -455,11 +455,11 @@ out:
                   force_iommu ||
                   valid_agp ||
                   fallback_aper_force) {
-               printk(KERN_ERR
+               printk(KERN_INFO
                        "Your BIOS doesn't leave a aperture memory hole\n");
-               printk(KERN_ERR
+               printk(KERN_INFO
                        "Please enable the IOMMU option in the BIOS setup\n");
-               printk(KERN_ERR
+               printk(KERN_INFO
                        "This costs you %d MB of RAM\n",
                                32 << fallback_aper_order);
 
index 732d1f4..5145a6e 100644 (file)
 #include <linux/suspend.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>
-#include <linux/smp_lock.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
index aa89387..505543a 100644 (file)
@@ -22,7 +22,7 @@
 
 #define __NO_STUBS 1
 #undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #define __SYSCALL(nr, sym) [nr] = 1,
 static char syscalls[] = {
 #include <asm/unistd.h>
index c639bd5..fdd585f 100644 (file)
@@ -25,11 +25,11 @@ x86_bios_strerror(long status)
 {
        const char *str;
        switch (status) {
-       case  0: str = "Call completed without error"; break;
-       case -1: str = "Not implemented"; break;
-       case -2: str = "Invalid argument"; break;
-       case -3: str = "Call completed with error"; break;
-       default: str = "Unknown BIOS status code"; break;
+       case  0: str = "Call completed without error";  break;
+       case -1: str = "Not implemented";               break;
+       case -2: str = "Invalid argument";              break;
+       case -3: str = "Call completed with error";     break;
+       default: str = "Unknown BIOS status code";      break;
        }
        return str;
 }
index a11f5d4..305b465 100644 (file)
@@ -430,6 +430,49 @@ static __init int setup_noclflush(char *arg)
 }
 __setup("noclflush", setup_noclflush);
 
+struct msr_range {
+       unsigned min;
+       unsigned max;
+};
+
+static struct msr_range msr_range_array[] __cpuinitdata = {
+       { 0x00000000, 0x00000418},
+       { 0xc0000000, 0xc000040b},
+       { 0xc0010000, 0xc0010142},
+       { 0xc0011000, 0xc001103b},
+};
+
+static void __cpuinit print_cpu_msr(void)
+{
+       unsigned index;
+       u64 val;
+       int i;
+       unsigned index_min, index_max;
+
+       for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
+               index_min = msr_range_array[i].min;
+               index_max = msr_range_array[i].max;
+               for (index = index_min; index < index_max; index++) {
+                       if (rdmsrl_amd_safe(index, &val))
+                               continue;
+                       printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+               }
+       }
+}
+
+static int show_msr __cpuinitdata;
+static __init int setup_show_msr(char *arg)
+{
+       int num;
+
+       get_option(&arg, &num);
+
+       if (num > 0)
+               show_msr = num;
+       return 1;
+}
+__setup("show_msr=", setup_show_msr);
+
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
        if (c->x86_model_id[0])
@@ -439,6 +482,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
                printk(KERN_CONT " stepping %02x\n", c->x86_mask);
        else
                printk(KERN_CONT "\n");
+
+#ifdef CONFIG_SMP
+       if (c->cpu_index < show_msr)
+               print_cpu_msr();
+#else
+       if (show_msr)
+               print_cpu_msr();
+#endif
 }
 
 static __init int setup_disablecpuid(char *arg)
index f1685fb..b8e05ee 100644 (file)
@@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
        }
 
        if (c->x86 != 0xF) {
-               printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n");
+               printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n");
                return 0;
        }
 
index 15e13c0..3b5f064 100644 (file)
@@ -26,7 +26,7 @@
 #include <asm/cpufeature.h>
 
 #define PFX            "speedstep-centrino: "
-#define MAINTAINER     "cpufreq@lists.linux.org.uk"
+#define MAINTAINER     "cpufreq@vger.kernel.org"
 
 #define dprintk(msg...) \
        cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
index b75f256..f113ef4 100644 (file)
@@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
                        set_cpu_cap(c, X86_FEATURE_BTS);
                if (!(l1 & (1<<12)))
                        set_cpu_cap(c, X86_FEATURE_PEBS);
+               ds_init_intel(c);
        }
 
        if (cpu_has_bts)
-               ds_init_intel(c);
+               ptrace_bts_init_intel(c);
 
        /*
         * See if we have a good local APIC by checking for buggy Pentia,
index cb7d3b6..4e8d77f 100644 (file)
@@ -401,12 +401,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
                tmp |= ~((1<<(hi - 1)) - 1);
 
                if (tmp != mask_lo) {
-                       static int once = 1;
-
-                       if (once) {
-                               printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
-                               once = 0;
-                       }
+                       WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
                        mask_lo = tmp;
                }
        }
index 84c480b..4c42146 100644 (file)
@@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
                        }
                        /* RED-PEN: base can be > 32bit */ 
                        len += seq_printf(seq, 
-                                  "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
+                                  "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
                             i, base, base >> (20 - PAGE_SHIFT), size, factor,
-                            mtrr_attrib_to_str(type), mtrr_usage_table[i]);
+                            mtrr_usage_table[i], mtrr_attrib_to_str(type));
                }
        }
        return 0;
index 885c826..c78c048 100644 (file)
@@ -729,7 +729,7 @@ struct var_mtrr_range_state {
        mtrr_type type;
 };
 
-struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
 static int __initdata debug_print;
 
 static int __init
@@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
        /* take out UC ranges */
        for (i = 0; i < num_var_ranges; i++) {
                type = range_state[i].type;
-               if (type != MTRR_TYPE_UNCACHABLE)
+               if (type != MTRR_TYPE_UNCACHABLE &&
+                   type != MTRR_TYPE_WRPROT)
                        continue;
                size = range_state[i].size_pfn;
                if (!size)
@@ -836,6 +837,13 @@ static int __init enable_mtrr_cleanup_setup(char *str)
 }
 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
 
+static int __init mtrr_cleanup_debug_setup(char *str)
+{
+       debug_print = 1;
+       return 0;
+}
+early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
+
 struct var_mtrr_state {
        unsigned long   range_startk;
        unsigned long   range_sizek;
@@ -898,6 +906,27 @@ set_var_mtrr_all(unsigned int address_bits)
        }
 }
 
+static unsigned long to_size_factor(unsigned long sizek, char *factorp)
+{
+       char factor;
+       unsigned long base = sizek;
+
+       if (base & ((1<<10) - 1)) {
+               /* not MB alignment */
+               factor = 'K';
+       } else if (base & ((1<<20) - 1)){
+               factor = 'M';
+               base >>= 10;
+       } else {
+               factor = 'G';
+               base >>= 20;
+       }
+
+       *factorp = factor;
+
+       return base;
+}
+
 static unsigned int __init
 range_to_mtrr(unsigned int reg, unsigned long range_startk,
              unsigned long range_sizek, unsigned char type)
@@ -919,13 +948,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
                        align = max_align;
 
                sizek = 1 << align;
-               if (debug_print)
+               if (debug_print) {
+                       char start_factor = 'K', size_factor = 'K';
+                       unsigned long start_base, size_base;
+
+                       start_base = to_size_factor(range_startk, &start_factor),
+                       size_base = to_size_factor(sizek, &size_factor),
+
                        printk(KERN_DEBUG "Setting variable MTRR %d, "
-                               "base: %ldMB, range: %ldMB, type %s\n",
-                               reg, range_startk >> 10, sizek >> 10,
+                               "base: %ld%cB, range: %ld%cB, type %s\n",
+                               reg, start_base, start_factor,
+                               size_base, size_factor,
                                (type == MTRR_TYPE_UNCACHABLE)?"UC":
                                    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
                                );
+               }
                save_var_mtrr(reg++, range_startk, sizek, type);
                range_startk += sizek;
                range_sizek -= sizek;
@@ -970,6 +1007,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
        /* try to append some small hole */
        range0_basek = state->range_startk;
        range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+
+       /* no increase */
        if (range0_sizek == state->range_sizek) {
                if (debug_print)
                        printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
@@ -980,13 +1019,40 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
                return 0;
        }
 
-       range0_sizek -= chunk_sizek;
-       if (range0_sizek && sizek) {
-           while (range0_basek + range0_sizek > (basek + sizek)) {
-               range0_sizek -= chunk_sizek;
-               if (!range0_sizek)
-                       break;
-           }
+       /* only cut back, when it is not the last */
+       if (sizek) {
+               while (range0_basek + range0_sizek > (basek + sizek)) {
+                       if (range0_sizek >= chunk_sizek)
+                               range0_sizek -= chunk_sizek;
+                       else
+                               range0_sizek = 0;
+
+                       if (!range0_sizek)
+                               break;
+               }
+       }
+
+second_try:
+       range_basek = range0_basek + range0_sizek;
+
+       /* one hole in the middle */
+       if (range_basek > basek && range_basek <= (basek + sizek))
+               second_sizek = range_basek - basek;
+
+       if (range0_sizek > state->range_sizek) {
+
+               /* one hole in middle or at end */
+               hole_sizek = range0_sizek - state->range_sizek - second_sizek;
+
+               /* hole size should be less than half of range0 size */
+               if (hole_sizek >= (range0_sizek >> 1) &&
+                   range0_sizek >= chunk_sizek) {
+                       range0_sizek -= chunk_sizek;
+                       second_sizek = 0;
+                       hole_sizek = 0;
+
+                       goto second_try;
+               }
        }
 
        if (range0_sizek) {
@@ -996,50 +1062,28 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
                                (range0_basek + range0_sizek)<<10);
                state->reg = range_to_mtrr(state->reg, range0_basek,
                                range0_sizek, MTRR_TYPE_WRBACK);
-
-       }
-
-       range_basek = range0_basek + range0_sizek;
-       range_sizek = chunk_sizek;
-
-       if (range_basek + range_sizek > basek &&
-           range_basek + range_sizek <= (basek + sizek)) {
-               /* one hole */
-               second_basek = basek;
-               second_sizek = range_basek + range_sizek - basek;
        }
 
-       /* if last piece, only could one hole near end */
-       if ((second_basek || !basek) &&
-           range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
-           (chunk_sizek >> 1)) {
-               /*
-                * one hole in middle (second_sizek is 0) or at end
-                * (second_sizek is 0 )
-                */
-               hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
-                                - second_sizek;
-               hole_basek = range_basek + range_sizek - hole_sizek
-                                - second_sizek;
-       } else {
-               /* fallback for big hole, or several holes */
+       if (range0_sizek < state->range_sizek) {
+               /* need to handle left over */
                range_sizek = state->range_sizek - range0_sizek;
-               second_basek = 0;
-               second_sizek = 0;
+
+               if (debug_print)
+                       printk(KERN_DEBUG "range: %016lx - %016lx\n",
+                                range_basek<<10,
+                                (range_basek + range_sizek)<<10);
+               state->reg = range_to_mtrr(state->reg, range_basek,
+                                range_sizek, MTRR_TYPE_WRBACK);
        }
 
-       if (debug_print)
-               printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
-                        (range_basek + range_sizek)<<10);
-       state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
-                                        MTRR_TYPE_WRBACK);
        if (hole_sizek) {
+               hole_basek = range_basek - hole_sizek - second_sizek;
                if (debug_print)
                        printk(KERN_DEBUG "hole: %016lx - %016lx\n",
-                                hole_basek<<10, (hole_basek + hole_sizek)<<10);
-               state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
-                                                MTRR_TYPE_UNCACHABLE);
-
+                                hole_basek<<10,
+                                (hole_basek + hole_sizek)<<10);
+               state->reg = range_to_mtrr(state->reg, hole_basek,
+                                hole_sizek, MTRR_TYPE_UNCACHABLE);
        }
 
        return second_sizek;
@@ -1154,11 +1198,11 @@ struct mtrr_cleanup_result {
 };
 
 /*
- * gran_size: 1M, 2M, ..., 2G
- * chunk size: gran_size, ..., 4G
- * so we need (2+13)*6
+ * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
+ * chunk size: gran_size, ..., 2G
+ * so we need (1+16)*8
  */
-#define NUM_RESULT     90
+#define NUM_RESULT     136
 #define PSHIFT         (PAGE_SHIFT - 10)
 
 static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
@@ -1168,13 +1212,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM];
 static int __init mtrr_cleanup(unsigned address_bits)
 {
        unsigned long extra_remove_base, extra_remove_size;
-       unsigned long i, base, size, def, dummy;
+       unsigned long base, size, def, dummy;
        mtrr_type type;
        int nr_range, nr_range_new;
        u64 chunk_size, gran_size;
        unsigned long range_sums, range_sums_new;
        int index_good;
        int num_reg_good;
+       int i;
 
        /* extra one for all 0 */
        int num[MTRR_NUM_TYPES + 1];
@@ -1204,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits)
                        continue;
                if (!size)
                        type = MTRR_NUM_TYPES;
+               if (type == MTRR_TYPE_WRPROT)
+                       type = MTRR_TYPE_UNCACHABLE;
                num[type]++;
        }
 
@@ -1216,23 +1263,57 @@ static int __init mtrr_cleanup(unsigned address_bits)
                num_var_ranges - num[MTRR_NUM_TYPES])
                return 0;
 
+       /* print original var MTRRs at first, for debugging: */
+       printk(KERN_DEBUG "original variable MTRRs\n");
+       for (i = 0; i < num_var_ranges; i++) {
+               char start_factor = 'K', size_factor = 'K';
+               unsigned long start_base, size_base;
+
+               size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+               if (!size_base)
+                       continue;
+
+               size_base = to_size_factor(size_base, &size_factor),
+               start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+               start_base = to_size_factor(start_base, &start_factor),
+               type = range_state[i].type;
+
+               printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+                       i, start_base, start_factor,
+                       size_base, size_factor,
+                       (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+                           ((type == MTRR_TYPE_WRPROT) ? "WP" :
+                            ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+                       );
+       }
+
        memset(range, 0, sizeof(range));
        extra_remove_size = 0;
-       if (mtrr_tom2) {
-               extra_remove_base = 1 << (32 - PAGE_SHIFT);
+       extra_remove_base = 1 << (32 - PAGE_SHIFT);
+       if (mtrr_tom2)
                extra_remove_size =
                        (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
-       }
        nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
                                          extra_remove_size);
+       /*
+        * [0, 1M) should always be coverred by var mtrr with WB
+        * and fixed mtrrs should take effective before var mtrr for it
+        */
+       nr_range = add_range_with_merge(range, nr_range, 0,
+                                       (1ULL<<(20 - PAGE_SHIFT)) - 1);
+       /* sort the ranges */
+       sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+
        range_sums = sum_ranges(range, nr_range);
        printk(KERN_INFO "total RAM coverred: %ldM\n",
               range_sums >> (20 - PAGE_SHIFT));
 
        if (mtrr_chunk_size && mtrr_gran_size) {
                int num_reg;
+               char gran_factor, chunk_factor, lose_factor;
+               unsigned long gran_base, chunk_base, lose_base;
 
-               debug_print = 1;
+               debug_print++;
                /* convert ranges to var ranges state */
                num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
                                              mtrr_gran_size);
@@ -1256,34 +1337,48 @@ static int __init mtrr_cleanup(unsigned address_bits)
                        result[i].lose_cover_sizek =
                                (range_sums - range_sums_new) << PSHIFT;
 
-               printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
-                        result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
-                        result[i].chunk_sizek >> 10);
-               printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+               gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+               chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+               lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+               printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+                        result[i].bad?"*BAD*":" ",
+                        gran_base, gran_factor, chunk_base, chunk_factor);
+               printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
                         result[i].num_reg, result[i].bad?"-":"",
-                        result[i].lose_cover_sizek >> 10);
+                        lose_base, lose_factor);
                if (!result[i].bad) {
                        set_var_mtrr_all(address_bits);
                        return 1;
                }
                printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
                       "will find optimal one\n");
-               debug_print = 0;
+               debug_print--;
                memset(result, 0, sizeof(result[0]));
        }
 
        i = 0;
        memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
        memset(result, 0, sizeof(result));
-       for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
-               for (chunk_size = gran_size; chunk_size < (1ULL<<33);
+       for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
+               char gran_factor;
+               unsigned long gran_base;
+
+               if (debug_print)
+                       gran_base = to_size_factor(gran_size >> 10, &gran_factor);
+
+               for (chunk_size = gran_size; chunk_size < (1ULL<<32);
                     chunk_size <<= 1) {
                        int num_reg;
 
-                       if (debug_print)
-                               printk(KERN_INFO
-                              "\ngran_size: %lldM   chunk_size_size: %lldM\n",
-                                      gran_size >> 20, chunk_size >> 20);
+                       if (debug_print) {
+                               char chunk_factor;
+                               unsigned long chunk_base;
+
+                               chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
+                               printk(KERN_INFO "\n");
+                               printk(KERN_INFO "gran_size: %ld%c   chunk_size: %ld%c \n",
+                                      gran_base, gran_factor, chunk_base, chunk_factor);
+                       }
                        if (i >= NUM_RESULT)
                                continue;
 
@@ -1326,12 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits)
 
        /* print out all */
        for (i = 0; i < NUM_RESULT; i++) {
-               printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
-                      result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
-                      result[i].chunk_sizek >> 10);
-               printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
-                      result[i].num_reg, result[i].bad?"-":"",
-                      result[i].lose_cover_sizek >> 10);
+               char gran_factor, chunk_factor, lose_factor;
+               unsigned long gran_base, chunk_base, lose_base;
+
+               gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+               chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+               lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+               printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+                        result[i].bad?"*BAD*":" ",
+                        gran_base, gran_factor, chunk_base, chunk_factor);
+               printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
+                        result[i].num_reg, result[i].bad?"-":"",
+                        lose_base, lose_factor);
        }
 
        /* try to find the optimal index */
@@ -1339,10 +1440,8 @@ static int __init mtrr_cleanup(unsigned address_bits)
                nr_mtrr_spare_reg = num_var_ranges - 1;
        num_reg_good = -1;
        for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
-               if (!min_loss_pfn[i]) {
+               if (!min_loss_pfn[i])
                        num_reg_good = i;
-                       break;
-               }
        }
 
        index_good = -1;
@@ -1358,21 +1457,26 @@ static int __init mtrr_cleanup(unsigned address_bits)
        }
 
        if (index_good != -1) {
+               char gran_factor, chunk_factor, lose_factor;
+               unsigned long gran_base, chunk_base, lose_base;
+
                printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
                i = index_good;
-               printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
-                               result[i].gran_sizek >> 10,
-                               result[i].chunk_sizek >> 10);
-               printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
-                               result[i].num_reg,
-                               result[i].lose_cover_sizek >> 10);
+               gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+               chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+               lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+               printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
+                        gran_base, gran_factor, chunk_base, chunk_factor);
+               printk(KERN_CONT "num_reg: %d  \tlose RAM: %ld%c\n",
+                        result[i].num_reg, lose_base, lose_factor);
                /* convert ranges to var ranges state */
                chunk_size = result[i].chunk_sizek;
                chunk_size <<= 10;
                gran_size = result[i].gran_sizek;
                gran_size <<= 10;
-               debug_print = 1;
+               debug_print++;
                x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+               debug_print--;
                set_var_mtrr_all(address_bits);
                return 1;
        }
index 05cc22d..6bff382 100644 (file)
@@ -295,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz)
        /* setup the timer */
        wrmsr(evntsel_msr, evntsel, 0);
        write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= K7_EVNTSEL_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
 
+       /* initialize the wd struct before enabling */
        wd->perfctr_msr = perfctr_msr;
        wd->evntsel_msr = evntsel_msr;
        wd->cccr_msr = 0;  /* unused */
+
+       /* ok, everything is initialized, announce that we're set */
+       cpu_nmi_set_wd_enabled();
+
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= K7_EVNTSEL_ENABLE;
+       wrmsr(evntsel_msr, evntsel, 0);
+
        return 1;
 }
 
@@ -379,13 +385,19 @@ static int setup_p6_watchdog(unsigned nmi_hz)
        wrmsr(evntsel_msr, evntsel, 0);
        nmi_hz = adjust_for_32bit_ctr(nmi_hz);
        write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= P6_EVNTSEL0_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
 
+       /* initialize the wd struct before enabling */
        wd->perfctr_msr = perfctr_msr;
        wd->evntsel_msr = evntsel_msr;
        wd->cccr_msr = 0;  /* unused */
+
+       /* ok, everything is initialized, announce that we're set */
+       cpu_nmi_set_wd_enabled();
+
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= P6_EVNTSEL0_ENABLE;
+       wrmsr(evntsel_msr, evntsel, 0);
+
        return 1;
 }
 
@@ -432,6 +444,27 @@ static const struct wd_ops p6_wd_ops = {
 #define P4_CCCR_ENABLE         (1 << 12)
 #define P4_CCCR_OVF            (1 << 31)
 
+#define P4_CONTROLS 18
+static unsigned int p4_controls[18] = {
+       MSR_P4_BPU_CCCR0,
+       MSR_P4_BPU_CCCR1,
+       MSR_P4_BPU_CCCR2,
+       MSR_P4_BPU_CCCR3,
+       MSR_P4_MS_CCCR0,
+       MSR_P4_MS_CCCR1,
+       MSR_P4_MS_CCCR2,
+       MSR_P4_MS_CCCR3,
+       MSR_P4_FLAME_CCCR0,
+       MSR_P4_FLAME_CCCR1,
+       MSR_P4_FLAME_CCCR2,
+       MSR_P4_FLAME_CCCR3,
+       MSR_P4_IQ_CCCR0,
+       MSR_P4_IQ_CCCR1,
+       MSR_P4_IQ_CCCR2,
+       MSR_P4_IQ_CCCR3,
+       MSR_P4_IQ_CCCR4,
+       MSR_P4_IQ_CCCR5,
+};
 /*
  * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  * CRU_ESCR0 (with any non-null event selector) through a complemented
@@ -473,6 +506,26 @@ static int setup_p4_watchdog(unsigned nmi_hz)
                evntsel_msr = MSR_P4_CRU_ESCR0;
                cccr_msr = MSR_P4_IQ_CCCR0;
                cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+
+               /*
+                * If we're on the kdump kernel or other situation, we may
+                * still have other performance counter registers set to
+                * interrupt and they'll keep interrupting forever because
+                * of the P4_CCCR_OVF quirk. So we need to ACK all the
+                * pending interrupts and disable all the registers here,
+                * before reenabling the NMI delivery. Refer to p4_rearm()
+                * about the P4_CCCR_OVF quirk.
+                */
+               if (reset_devices) {
+                       unsigned int low, high;
+                       int i;
+
+                       for (i = 0; i < P4_CONTROLS; i++) {
+                               rdmsr(p4_controls[i], low, high);
+                               low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
+                               wrmsr(p4_controls[i], low, high);
+                       }
+               }
        } else {
                /* logical cpu 1 */
                perfctr_msr = MSR_P4_IQ_PERFCTR1;
@@ -499,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz)
        wrmsr(evntsel_msr, evntsel, 0);
        wrmsr(cccr_msr, cccr_val, 0);
        write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       cccr_val |= P4_CCCR_ENABLE;
-       wrmsr(cccr_msr, cccr_val, 0);
+
        wd->perfctr_msr = perfctr_msr;
        wd->evntsel_msr = evntsel_msr;
        wd->cccr_msr = cccr_msr;
+
+       /* ok, everything is initialized, announce that we're set */
+       cpu_nmi_set_wd_enabled();
+
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       cccr_val |= P4_CCCR_ENABLE;
+       wrmsr(cccr_msr, cccr_val, 0);
        return 1;
 }
 
@@ -620,13 +678,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
        wrmsr(evntsel_msr, evntsel, 0);
        nmi_hz = adjust_for_32bit_ctr(nmi_hz);
        write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
 
        wd->perfctr_msr = perfctr_msr;
        wd->evntsel_msr = evntsel_msr;
        wd->cccr_msr = 0;  /* unused */
+
+       /* ok, everything is initialized, announce that we're set */
+       cpu_nmi_set_wd_enabled();
+
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+       wrmsr(evntsel_msr, evntsel, 0);
        intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
        return 1;
 }
index 8e9cd6a..6a44d64 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/smp_lock.h>
 #include <linux/major.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
index 15e6c6b..e90a60e 100644 (file)
@@ -7,9 +7,8 @@
 
 #include <linux/errno.h>
 #include <linux/crash_dump.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 
 /**
  * copy_oldmem_page - copy one page from "oldmem"
@@ -25,7 +24,7 @@
  * in the current kernel. We stitch up a pte, similar to kmap_atomic.
  */
 ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
-                               size_t csize, unsigned long offset, int userbuf)
+               size_t csize, unsigned long offset, int userbuf)
 {
        void  *vaddr;
 
@@ -33,14 +32,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
                return 0;
 
        vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+       if (!vaddr)
+               return -ENOMEM;
 
        if (userbuf) {
-               if (copy_to_user(buf, (vaddr + offset), csize)) {
+               if (copy_to_user(buf, vaddr + offset, csize)) {
                        iounmap(vaddr);
                        return -EFAULT;
                }
        } else
-       memcpy(buf, (vaddr + offset), csize);
+               memcpy(buf, vaddr + offset, csize);
 
        iounmap(vaddr);
        return csize;
index 11c11b8..2b69994 100644 (file)
@@ -2,26 +2,49 @@
  * Debug Store support
  *
  * This provides a low-level interface to the hardware's Debug Store
- * feature that is used for last branch recording (LBR) and
+ * feature that is used for branch trace store (BTS) and
  * precise-event based sampling (PEBS).
  *
- * Different architectures use a different DS layout/pointer size.
- * The below functions therefore work on a void*.
+ * It manages:
+ * - per-thread and per-cpu allocation of BTS and PEBS
+ * - buffer memory allocation (optional)
+ * - buffer overflow handling
+ * - buffer access
  *
+ * It assumes:
+ * - get_task_struct on all parameter tasks
+ * - current is allowed to trace parameter tasks
  *
- * Since there is no user for PEBS, yet, only LBR (or branch
- * trace store, BTS) is supported.
  *
- *
- * Copyright (C) 2007 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ * Copyright (C) 2007-2008 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
  */
 
+
+#ifdef CONFIG_X86_DS
+
 #include <asm/ds.h>
 
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+
+/*
+ * The configuration for a particular DS hardware implementation.
+ */
+struct ds_configuration {
+       /* the size of the DS structure in bytes */
+       unsigned char  sizeof_ds;
+       /* the size of one pointer-typed field in the DS structure in bytes;
+          this covers the first 8 fields related to buffer management. */
+       unsigned char  sizeof_field;
+       /* the size of a BTS/PEBS record in bytes */
+       unsigned char  sizeof_rec[2];
+};
+static struct ds_configuration ds_cfg;
 
 
 /*
  *   (interrupt occurs when write pointer passes interrupt pointer)
  * - value to which counter is reset following counter overflow
  *
- * On later architectures, the last branch recording hardware uses
- * 64bit pointers even in 32bit mode.
- *
- *
- * Branch Trace Store (BTS) records store information about control
- * flow changes. They at least provide the following information:
- * - source linear address
- * - destination linear address
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
  *
- * Netburst supported a predicated bit that had been dropped in later
- * architectures. We do not suppor it.
  *
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ * - an offset giving the start of the respective region
  *
- * In order to abstract from the actual DS and BTS layout, we describe
- * the access to the relevant fields.
- * Thanks to Andi Kleen for proposing this design.
+ * This offset is further used to index various arrays holding
+ * information for BTS and PEBS at the respective index.
  *
- * The implementation, however, is not as general as it might seem. In
- * order to stay somewhat simple and efficient, we assume an
- * underlying unsigned type (mostly a pointer type) and we expect the
- * field to be at least as big as that type.
+ * On later 32bit processors, we only access the lower 32bit of the
+ * 64bit pointer fields. The upper halves will be zeroed out.
  */
 
-/*
- * A special from_ip address to indicate that the BTS record is an
- * info record that needs to be interpreted or skipped.
- */
-#define BTS_ESCAPE_ADDRESS (-1)
+enum ds_field {
+       ds_buffer_base = 0,
+       ds_index,
+       ds_absolute_maximum,
+       ds_interrupt_threshold,
+};
 
-/*
- * A field access descriptor
- */
-struct access_desc {
-       unsigned char offset;
-       unsigned char size;
+enum ds_qualifier {
+       ds_bts  = 0,
+       ds_pebs
 };
 
+static inline unsigned long ds_get(const unsigned char *base,
+                                  enum ds_qualifier qual, enum ds_field field)
+{
+       base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+       return *(unsigned long *)base;
+}
+
+static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
+                         enum ds_field field, unsigned long value)
+{
+       base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+       (*(unsigned long *)base) = value;
+}
+
+
 /*
- * The configuration for a particular DS/BTS hardware implementation.
+ * Locking is done only for allocating BTS or PEBS resources and for
+ * guarding context and buffer memory allocation.
+ *
+ * Most functions require the current task to own the ds context part
+ * they are going to access. All the locking is done when validating
+ * access to the context.
  */
-struct ds_configuration {
-       /* the DS configuration */
-       unsigned char  sizeof_ds;
-       struct access_desc bts_buffer_base;
-       struct access_desc bts_index;
-       struct access_desc bts_absolute_maximum;
-       struct access_desc bts_interrupt_threshold;
-       /* the BTS configuration */
-       unsigned char  sizeof_bts;
-       struct access_desc from_ip;
-       struct access_desc to_ip;
-       /* BTS variants used to store additional information like
-          timestamps */
-       struct access_desc info_type;
-       struct access_desc info_data;
-       unsigned long debugctl_mask;
-};
+static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
 
 /*
- * The global configuration used by the below accessor functions
+ * Validate that the current task is allowed to access the BTS/PEBS
+ * buffer of the parameter task.
+ *
+ * Returns 0, if access is granted; -Eerrno, otherwise.
  */
-static struct ds_configuration ds_cfg;
+static inline int ds_validate_access(struct ds_context *context,
+                                    enum ds_qualifier qual)
+{
+       if (!context)
+               return -EPERM;
+
+       if (context->owner[qual] == current)
+               return 0;
+
+       return -EPERM;
+}
+
 
 /*
- * Accessor functions for some DS and BTS fields using the above
- * global ptrace_bts_cfg.
+ * We either support (system-wide) per-cpu or per-thread allocation.
+ * We distinguish the two based on the task_struct pointer, where a
+ * NULL pointer indicates per-cpu allocation for the current cpu.
+ *
+ * Allocations are use-counted. As soon as resources are allocated,
+ * further allocations must be of the same type (per-cpu or
+ * per-thread). We model this by counting allocations (i.e. the number
+ * of tracers of a certain type) for one type negatively:
+ *   =0  no tracers
+ *   >0  number of per-thread tracers
+ *   <0  number of per-cpu tracers
+ *
+ * The below functions to get and put tracers and to check the
+ * allocation type require the ds_lock to be held by the caller.
+ *
+ * Tracers essentially gives the number of ds contexts for a certain
+ * type of allocation.
  */
-static inline unsigned long get_bts_buffer_base(char *base)
+static long tracers;
+
+static inline void get_tracer(struct task_struct *task)
 {
-       return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset);
+       tracers += (task ? 1 : -1);
 }
-static inline void set_bts_buffer_base(char *base, unsigned long value)
+
+static inline void put_tracer(struct task_struct *task)
 {
-       (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value;
+       tracers -= (task ? 1 : -1);
 }
-static inline unsigned long get_bts_index(char *base)
+
+static inline int check_tracer(struct task_struct *task)
 {
-       return *(unsigned long *)(base + ds_cfg.bts_index.offset);
+       return (task ? (tracers >= 0) : (tracers <= 0));
 }
-static inline void set_bts_index(char *base, unsigned long value)
+
+
+/*
+ * The DS context is either attached to a thread or to a cpu:
+ * - in the former case, the thread_struct contains a pointer to the
+ *   attached context.
+ * - in the latter case, we use a static array of per-cpu context
+ *   pointers.
+ *
+ * Contexts are use-counted. They are allocated on first access and
+ * deallocated when the last user puts the context.
+ *
+ * We distinguish between an allocating and a non-allocating get of a
+ * context:
+ * - the allocating get is used for requesting BTS/PEBS resources. It
+ *   requires the caller to hold the global ds_lock.
+ * - the non-allocating get is used for all other cases. A
+ *   non-existing context indicates an error. It acquires and releases
+ *   the ds_lock itself for obtaining the context.
+ *
+ * A context and its DS configuration are allocated and deallocated
+ * together. A context always has a DS configuration of the
+ * appropriate size.
+ */
+static DEFINE_PER_CPU(struct ds_context *, system_context);
+
+#define this_system_context per_cpu(system_context, smp_processor_id())
+
+/*
+ * Returns the pointer to the parameter task's context or to the
+ * system-wide context, if task is NULL.
+ *
+ * Increases the use count of the returned context, if not NULL.
+ */
+static inline struct ds_context *ds_get_context(struct task_struct *task)
 {
-       (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value;
+       struct ds_context *context;
+
+       spin_lock(&ds_lock);
+
+       context = (task ? task->thread.ds_ctx : this_system_context);
+       if (context)
+               context->count++;
+
+       spin_unlock(&ds_lock);
+
+       return context;
 }
-static inline unsigned long get_bts_absolute_maximum(char *base)
+
+/*
+ * Same as ds_get_context, but allocates the context and it's DS
+ * structure, if necessary; returns NULL; if out of memory.
+ *
+ * pre: requires ds_lock to be held
+ */
+static inline struct ds_context *ds_alloc_context(struct task_struct *task)
 {
-       return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset);
+       struct ds_context **p_context =
+               (task ? &task->thread.ds_ctx : &this_system_context);
+       struct ds_context *context = *p_context;
+
+       if (!context) {
+               context = kzalloc(sizeof(*context), GFP_KERNEL);
+
+               if (!context)
+                       return NULL;
+
+               context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+               if (!context->ds) {
+                       kfree(context);
+                       return NULL;
+               }
+
+               *p_context = context;
+
+               context->this = p_context;
+               context->task = task;
+
+               if (task)
+                       set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+
+               if (!task || (task == current))
+                       wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
+
+               get_tracer(task);
+       }
+
+       context->count++;
+
+       return context;
 }
-static inline void set_bts_absolute_maximum(char *base, unsigned long value)
+
+/*
+ * Decreases the use count of the parameter context, if not NULL.
+ * Deallocates the context, if the use count reaches zero.
+ */
+static inline void ds_put_context(struct ds_context *context)
 {
-       (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
+       if (!context)
+               return;
+
+       spin_lock(&ds_lock);
+
+       if (--context->count)
+               goto out;
+
+       *(context->this) = NULL;
+
+       if (context->task)
+               clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+
+       if (!context->task || (context->task == current))
+               wrmsrl(MSR_IA32_DS_AREA, 0);
+
+       put_tracer(context->task);
+
+       /* free any leftover buffers from tracers that did not
+        * deallocate them properly. */
+       kfree(context->buffer[ds_bts]);
+       kfree(context->buffer[ds_pebs]);
+       kfree(context->ds);
+       kfree(context);
+ out:
+       spin_unlock(&ds_lock);
 }
-static inline unsigned long get_bts_interrupt_threshold(char *base)
+
+
+/*
+ * Handle a buffer overflow
+ *
+ * task: the task whose buffers are overflowing;
+ *       NULL for a buffer overflow on the current cpu
+ * context: the ds context
+ * qual: the buffer type
+ */
+static void ds_overflow(struct task_struct *task, struct ds_context *context,
+                       enum ds_qualifier qual)
 {
-       return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset);
+       if (!context)
+               return;
+
+       if (context->callback[qual])
+               (*context->callback[qual])(task);
+
+       /* todo: do some more overflow handling */
 }
-static inline void set_bts_interrupt_threshold(char *base, unsigned long value)
+
+
+/*
+ * Allocate a non-pageable buffer of the parameter size.
+ * Checks the memory and the locked memory rlimit.
+ *
+ * Returns the buffer, if successful;
+ *         NULL, if out of memory or rlimit exceeded.
+ *
+ * size: the requested buffer size in bytes
+ * pages (out): if not NULL, contains the number of pages reserved
+ */
+static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
 {
-       (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
+       unsigned long rlim, vm, pgsz;
+       void *buffer;
+
+       pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+       rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+       vm   = current->mm->total_vm  + pgsz;
+       if (rlim < vm)
+               return NULL;
+
+       rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+       vm   = current->mm->locked_vm  + pgsz;
+       if (rlim < vm)
+               return NULL;
+
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
+               return NULL;
+
+       current->mm->total_vm  += pgsz;
+       current->mm->locked_vm += pgsz;
+
+       if (pages)
+               *pages = pgsz;
+
+       return buffer;
 }
-static inline unsigned long get_from_ip(char *base)
+
+static int ds_request(struct task_struct *task, void *base, size_t size,
+                     ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
 {
-       return *(unsigned long *)(base + ds_cfg.from_ip.offset);
+       struct ds_context *context;
+       unsigned long buffer, adj;
+       const unsigned long alignment = (1 << 3);
+       int error = 0;
+
+       if (!ds_cfg.sizeof_ds)
+               return -EOPNOTSUPP;
+
+       /* we require some space to do alignment adjustments below */
+       if (size < (alignment + ds_cfg.sizeof_rec[qual]))
+               return -EINVAL;
+
+       /* buffer overflow notification is not yet implemented */
+       if (ovfl)
+               return -EOPNOTSUPP;
+
+
+       spin_lock(&ds_lock);
+
+       if (!check_tracer(task))
+               return -EPERM;
+
+       error = -ENOMEM;
+       context = ds_alloc_context(task);
+       if (!context)
+               goto out_unlock;
+
+       error = -EALREADY;
+       if (context->owner[qual] == current)
+               goto out_unlock;
+       error = -EPERM;
+       if (context->owner[qual] != NULL)
+               goto out_unlock;
+       context->owner[qual] = current;
+
+       spin_unlock(&ds_lock);
+
+
+       error = -ENOMEM;
+       if (!base) {
+               base = ds_allocate_buffer(size, &context->pages[qual]);
+               if (!base)
+                       goto out_release;
+
+               context->buffer[qual]   = base;
+       }
+       error = 0;
+
+       context->callback[qual] = ovfl;
+
+       /* adjust the buffer address and size to meet alignment
+        * constraints:
+        * - buffer is double-word aligned
+        * - size is multiple of record size
+        *
+        * We checked the size at the very beginning; we have enough
+        * space to do the adjustment.
+        */
+       buffer = (unsigned long)base;
+
+       adj = ALIGN(buffer, alignment) - buffer;
+       buffer += adj;
+       size   -= adj;
+
+       size /= ds_cfg.sizeof_rec[qual];
+       size *= ds_cfg.sizeof_rec[qual];
+
+       ds_set(context->ds, qual, ds_buffer_base, buffer);
+       ds_set(context->ds, qual, ds_index, buffer);
+       ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
+
+       if (ovfl) {
+               /* todo: select a suitable interrupt threshold */
+       } else
+               ds_set(context->ds, qual,
+                      ds_interrupt_threshold, buffer + size + 1);
+
+       /* we keep the context until ds_release */
+       return error;
+
+ out_release:
+       context->owner[qual] = NULL;
+       ds_put_context(context);
+       return error;
+
+ out_unlock:
+       spin_unlock(&ds_lock);
+       ds_put_context(context);
+       return error;
 }
-static inline void set_from_ip(char *base, unsigned long value)
+
+int ds_request_bts(struct task_struct *task, void *base, size_t size,
+                  ds_ovfl_callback_t ovfl)
 {
-       (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value;
+       return ds_request(task, base, size, ovfl, ds_bts);
 }
-static inline unsigned long get_to_ip(char *base)
+
+int ds_request_pebs(struct task_struct *task, void *base, size_t size,
+                   ds_ovfl_callback_t ovfl)
 {
-       return *(unsigned long *)(base + ds_cfg.to_ip.offset);
+       return ds_request(task, base, size, ovfl, ds_pebs);
 }
-static inline void set_to_ip(char *base, unsigned long value)
+
+static int ds_release(struct task_struct *task, enum ds_qualifier qual)
 {
-       (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value;
+       struct ds_context *context;
+       int error;
+
+       context = ds_get_context(task);
+       error = ds_validate_access(context, qual);
+       if (error < 0)
+               goto out;
+
+       kfree(context->buffer[qual]);
+       context->buffer[qual] = NULL;
+
+       current->mm->total_vm  -= context->pages[qual];
+       current->mm->locked_vm -= context->pages[qual];
+       context->pages[qual] = 0;
+       context->owner[qual] = NULL;
+
+       /*
+        * we put the context twice:
+        *   once for the ds_get_context
+        *   once for the corresponding ds_request
+        */
+       ds_put_context(context);
+ out:
+       ds_put_context(context);
+       return error;
 }
-static inline unsigned char get_info_type(char *base)
+
+int ds_release_bts(struct task_struct *task)
 {
-       return *(unsigned char *)(base + ds_cfg.info_type.offset);
+       return ds_release(task, ds_bts);
 }
-static inline void set_info_type(char *base, unsigned char value)
+
+int ds_release_pebs(struct task_struct *task)
 {
-       (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
+       return ds_release(task, ds_pebs);
 }
-static inline unsigned long get_info_data(char *base)
+
+static int ds_get_index(struct task_struct *task, size_t *pos,
+                       enum ds_qualifier qual)
 {
-       return *(unsigned long *)(base + ds_cfg.info_data.offset);
+       struct ds_context *context;
+       unsigned long base, index;
+       int error;
+
+       context = ds_get_context(task);
+       error = ds_validate_access(context, qual);
+       if (error < 0)
+               goto out;
+
+       base  = ds_get(context->ds, qual, ds_buffer_base);
+       index = ds_get(context->ds, qual, ds_index);
+
+       error = ((index - base) / ds_cfg.sizeof_rec[qual]);
+       if (pos)
+               *pos = error;
+ out:
+       ds_put_context(context);
+       return error;
 }
-static inline void set_info_data(char *base, unsigned long value)
+
+int ds_get_bts_index(struct task_struct *task, size_t *pos)
 {
-       (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value;
+       return ds_get_index(task, pos, ds_bts);
 }
 
+int ds_get_pebs_index(struct task_struct *task, size_t *pos)
+{
+       return ds_get_index(task, pos, ds_pebs);
+}
 
-int ds_allocate(void **dsp, size_t bts_size_in_bytes)
+static int ds_get_end(struct task_struct *task, size_t *pos,
+                     enum ds_qualifier qual)
 {
-       size_t bts_size_in_records;
-       unsigned long bts;
-       void *ds;
+       struct ds_context *context;
+       unsigned long base, end;
+       int error;
+
+       context = ds_get_context(task);
+       error = ds_validate_access(context, qual);
+       if (error < 0)
+               goto out;
+
+       base = ds_get(context->ds, qual, ds_buffer_base);
+       end  = ds_get(context->ds, qual, ds_absolute_maximum);
+
+       error = ((end - base) / ds_cfg.sizeof_rec[qual]);
+       if (pos)
+               *pos = error;
+ out:
+       ds_put_context(context);
+       return error;
+}
 
-       if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-               return -EOPNOTSUPP;
+int ds_get_bts_end(struct task_struct *task, size_t *pos)
+{
+       return ds_get_end(task, pos, ds_bts);
+}
 
-       if (bts_size_in_bytes < 0)
-               return -EINVAL;
+int ds_get_pebs_end(struct task_struct *task, size_t *pos)
+{
+       return ds_get_end(task, pos, ds_pebs);
+}
 
-       bts_size_in_records =
-               bts_size_in_bytes / ds_cfg.sizeof_bts;
-       bts_size_in_bytes =
-               bts_size_in_records * ds_cfg.sizeof_bts;
+static int ds_access(struct task_struct *task, size_t index,
+                    const void **record, enum ds_qualifier qual)
+{
+       struct ds_context *context;
+       unsigned long base, idx;
+       int error;
 
-       if (bts_size_in_bytes <= 0)
+       if (!record)
                return -EINVAL;
 
-       bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL);
-
-       if (!bts)
-               return -ENOMEM;
+       context = ds_get_context(task);
+       error = ds_validate_access(context, qual);
+       if (error < 0)
+               goto out;
 
-       ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+       base = ds_get(context->ds, qual, ds_buffer_base);
+       idx = base + (index * ds_cfg.sizeof_rec[qual]);
 
-       if (!ds) {
-               kfree((void *)bts);
-               return -ENOMEM;
-       }
-
-       set_bts_buffer_base(ds, bts);
-       set_bts_index(ds, bts);
-       set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
-       set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
+       error = -EINVAL;
+       if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
+               goto out;
 
-       *dsp = ds;
-       return 0;
+       *record = (const void *)idx;
+       error = ds_cfg.sizeof_rec[qual];
+ out:
+       ds_put_context(context);
+       return error;
 }
 
-int ds_free(void **dsp)
+int ds_access_bts(struct task_struct *task, size_t index, const void **record)
 {
-       if (*dsp) {
-               kfree((void *)get_bts_buffer_base(*dsp));
-               kfree(*dsp);
-               *dsp = NULL;
-       }
-       return 0;
+       return ds_access(task, index, record, ds_bts);
 }
 
-int ds_get_bts_size(void *ds)
+int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
 {
-       int size_in_bytes;
-
-       if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-               return -EOPNOTSUPP;
-
-       if (!ds)
-               return 0;
-
-       size_in_bytes =
-               get_bts_absolute_maximum(ds) -
-               get_bts_buffer_base(ds);
-       return size_in_bytes;
+       return ds_access(task, index, record, ds_pebs);
 }
 
-int ds_get_bts_end(void *ds)
+static int ds_write(struct task_struct *task, const void *record, size_t size,
+                   enum ds_qualifier qual, int force)
 {
-       int size_in_bytes = ds_get_bts_size(ds);
-
-       if (size_in_bytes <= 0)
-               return size_in_bytes;
+       struct ds_context *context;
+       int error;
 
-       return size_in_bytes / ds_cfg.sizeof_bts;
-}
+       if (!record)
+               return -EINVAL;
 
-int ds_get_bts_index(void *ds)
-{
-       int index_offset_in_bytes;
+       error = -EPERM;
+       context = ds_get_context(task);
+       if (!context)
+               goto out;
 
-       if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-               return -EOPNOTSUPP;
+       if (!force) {
+               error = ds_validate_access(context, qual);
+               if (error < 0)
+                       goto out;
+       }
 
-       index_offset_in_bytes =
-               get_bts_index(ds) -
-               get_bts_buffer_base(ds);
+       error = 0;
+       while (size) {
+               unsigned long base, index, end, write_end, int_th;
+               unsigned long write_size, adj_write_size;
+
+               /*
+                * write as much as possible without producing an
+                * overflow interrupt.
+                *
+                * interrupt_threshold must either be
+                * - bigger than absolute_maximum or
+                * - point to a record between buffer_base and absolute_maximum
+                *
+                * index points to a valid record.
+                */
+               base   = ds_get(context->ds, qual, ds_buffer_base);
+               index  = ds_get(context->ds, qual, ds_index);
+               end    = ds_get(context->ds, qual, ds_absolute_maximum);
+               int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
+
+               write_end = min(end, int_th);
+
+               /* if we are already beyond the interrupt threshold,
+                * we fill the entire buffer */
+               if (write_end <= index)
+                       write_end = end;
+
+               if (write_end <= index)
+                       goto out;
+
+               write_size = min((unsigned long) size, write_end - index);
+               memcpy((void *)index, record, write_size);
+
+               record = (const char *)record + write_size;
+               size  -= write_size;
+               error += write_size;
+
+               adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
+               adj_write_size *= ds_cfg.sizeof_rec[qual];
+
+               /* zero out trailing bytes */
+               memset((char *)index + write_size, 0,
+                      adj_write_size - write_size);
+               index += adj_write_size;
+
+               if (index >= end)
+                       index = base;
+               ds_set(context->ds, qual, ds_index, index);
+
+               if (index >= int_th)
+                       ds_overflow(task, context, qual);
+       }
 
-       return index_offset_in_bytes / ds_cfg.sizeof_bts;
+ out:
+       ds_put_context(context);
+       return error;
 }
 
-int ds_set_overflow(void *ds, int method)
+int ds_write_bts(struct task_struct *task, const void *record, size_t size)
 {
-       switch (method) {
-       case DS_O_SIGNAL:
-               return -EOPNOTSUPP;
-       case DS_O_WRAP:
-               return 0;
-       default:
-               return -EINVAL;
-       }
+       return ds_write(task, record, size, ds_bts, /* force = */ 0);
 }
 
-int ds_get_overflow(void *ds)
+int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
 {
-       return DS_O_WRAP;
+       return ds_write(task, record, size, ds_pebs, /* force = */ 0);
 }
 
-int ds_clear(void *ds)
+int ds_unchecked_write_bts(struct task_struct *task,
+                          const void *record, size_t size)
 {
-       int bts_size = ds_get_bts_size(ds);
-       unsigned long bts_base;
-
-       if (bts_size <= 0)
-               return bts_size;
-
-       bts_base = get_bts_buffer_base(ds);
-       memset((void *)bts_base, 0, bts_size);
-
-       set_bts_index(ds, bts_base);
-       return 0;
+       return ds_write(task, record, size, ds_bts, /* force = */ 1);
 }
 
-int ds_read_bts(void *ds, int index, struct bts_struct *out)
+int ds_unchecked_write_pebs(struct task_struct *task,
+                           const void *record, size_t size)
 {
-       void *bts;
+       return ds_write(task, record, size, ds_pebs, /* force = */ 1);
+}
 
-       if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-               return -EOPNOTSUPP;
+static int ds_reset_or_clear(struct task_struct *task,
+                            enum ds_qualifier qual, int clear)
+{
+       struct ds_context *context;
+       unsigned long base, end;
+       int error;
 
-       if (index < 0)
-               return -EINVAL;
+       context = ds_get_context(task);
+       error = ds_validate_access(context, qual);
+       if (error < 0)
+               goto out;
 
-       if (index >= ds_get_bts_size(ds))
-               return -EINVAL;
+       base = ds_get(context->ds, qual, ds_buffer_base);
+       end  = ds_get(context->ds, qual, ds_absolute_maximum);
 
-       bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts));
+       if (clear)
+               memset((void *)base, 0, end - base);
 
-       memset(out, 0, sizeof(*out));
-       if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
-               out->qualifier       = get_info_type(bts);
-               out->variant.jiffies = get_info_data(bts);
-       } else {
-               out->qualifier = BTS_BRANCH;
-               out->variant.lbr.from_ip = get_from_ip(bts);
-               out->variant.lbr.to_ip   = get_to_ip(bts);
-       }
+       ds_set(context->ds, qual, ds_index, base);
 
-       return sizeof(*out);;
+       error = 0;
+ out:
+       ds_put_context(context);
+       return error;
 }
 
-int ds_write_bts(void *ds, const struct bts_struct *in)
+int ds_reset_bts(struct task_struct *task)
 {
-       unsigned long bts;
-
-       if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-               return -EOPNOTSUPP;
-
-       if (ds_get_bts_size(ds) <= 0)
-               return -ENXIO;
+       return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
+}
 
-       bts = get_bts_index(ds);
+int ds_reset_pebs(struct task_struct *task)
+{
+       return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
+}
 
-       memset((void *)bts, 0, ds_cfg.sizeof_bts);
-       switch (in->qualifier) {
-       case BTS_INVALID:
-               break;
+int ds_clear_bts(struct task_struct *task)
+{
+       return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
+}
 
-       case BTS_BRANCH:
-               set_from_ip((void *)bts, in->variant.lbr.from_ip);
-               set_to_ip((void *)bts, in->variant.lbr.to_ip);
-               break;
+int ds_clear_pebs(struct task_struct *task)
+{
+       return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
+}
 
-       case BTS_TASK_ARRIVES:
-       case BTS_TASK_DEPARTS:
-               set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS);
-               set_info_type((void *)bts, in->qualifier);
-               set_info_data((void *)bts, in->variant.jiffies);
-               break;
+int ds_get_pebs_reset(struct task_struct *task, u64 *value)
+{
+       struct ds_context *context;
+       int error;
 
-       default:
+       if (!value)
                return -EINVAL;
-       }
 
-       bts = bts + ds_cfg.sizeof_bts;
-       if (bts >= get_bts_absolute_maximum(ds))
-               bts = get_bts_buffer_base(ds);
-       set_bts_index(ds, bts);
+       context = ds_get_context(task);
+       error = ds_validate_access(context, ds_pebs);
+       if (error < 0)
+               goto out;
 
-       return ds_cfg.sizeof_bts;
+       *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
+
+       error = 0;
+ out:
+       ds_put_context(context);
+       return error;
 }
 
-unsigned long ds_debugctl_mask(void)
+int ds_set_pebs_reset(struct task_struct *task, u64 value)
 {
-       return ds_cfg.debugctl_mask;
-}
+       struct ds_context *context;
+       int error;
 
-#ifdef __i386__
-static const struct ds_configuration ds_cfg_netburst = {
-       .sizeof_ds = 9 * 4,
-       .bts_buffer_base = { 0, 4 },
-       .bts_index = { 4, 4 },
-       .bts_absolute_maximum = { 8, 4 },
-       .bts_interrupt_threshold = { 12, 4 },
-       .sizeof_bts = 3 * 4,
-       .from_ip = { 0, 4 },
-       .to_ip = { 4, 4 },
-       .info_type = { 4, 1 },
-       .info_data = { 8, 4 },
-       .debugctl_mask = (1<<2)|(1<<3)
-};
+       context = ds_get_context(task);
+       error = ds_validate_access(context, ds_pebs);
+       if (error < 0)
+               goto out;
 
-static const struct ds_configuration ds_cfg_pentium_m = {
-       .sizeof_ds = 9 * 4,
-       .bts_buffer_base = { 0, 4 },
-       .bts_index = { 4, 4 },
-       .bts_absolute_maximum = { 8, 4 },
-       .bts_interrupt_threshold = { 12, 4 },
-       .sizeof_bts = 3 * 4,
-       .from_ip = { 0, 4 },
-       .to_ip = { 4, 4 },
-       .info_type = { 4, 1 },
-       .info_data = { 8, 4 },
-       .debugctl_mask = (1<<6)|(1<<7)
+       *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
+
+       error = 0;
+ out:
+       ds_put_context(context);
+       return error;
+}
+
+static const struct ds_configuration ds_cfg_var = {
+       .sizeof_ds    = sizeof(long) * 12,
+       .sizeof_field = sizeof(long),
+       .sizeof_rec[ds_bts]   = sizeof(long) * 3,
+       .sizeof_rec[ds_pebs]  = sizeof(long) * 10
 };
-#endif /* _i386_ */
-
-static const struct ds_configuration ds_cfg_core2 = {
-       .sizeof_ds = 9 * 8,
-       .bts_buffer_base = { 0, 8 },
-       .bts_index = { 8, 8 },
-       .bts_absolute_maximum = { 16, 8 },
-       .bts_interrupt_threshold = { 24, 8 },
-       .sizeof_bts = 3 * 8,
-       .from_ip = { 0, 8 },
-       .to_ip = { 8, 8 },
-       .info_type = { 8, 1 },
-       .info_data = { 16, 8 },
-       .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+static const struct ds_configuration ds_cfg_64 = {
+       .sizeof_ds    = 8 * 12,
+       .sizeof_field = 8,
+       .sizeof_rec[ds_bts]   = 8 * 3,
+       .sizeof_rec[ds_pebs]  = 8 * 10
 };
 
 static inline void
@@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
        switch (c->x86) {
        case 0x6:
                switch (c->x86_model) {
-#ifdef __i386__
                case 0xD:
                case 0xE: /* Pentium M */
-                       ds_configure(&ds_cfg_pentium_m);
+                       ds_configure(&ds_cfg_var);
                        break;
-#endif /* _i386_ */
                case 0xF: /* Core2 */
-                       ds_configure(&ds_cfg_core2);
+               case 0x1C: /* Atom */
+                       ds_configure(&ds_cfg_64);
                        break;
                default:
                        /* sorry, don't know about them */
@@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
                break;
        case 0xF:
                switch (c->x86_model) {
-#ifdef __i386__
                case 0x0:
                case 0x1:
                case 0x2: /* Netburst */
-                       ds_configure(&ds_cfg_netburst);
+                       ds_configure(&ds_cfg_var);
                        break;
-#endif /* _i386_ */
                default:
                        /* sorry, don't know about them */
                        break;
@@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
                break;
        }
 }
+
+void ds_free(struct ds_context *context)
+{
+       /* This is called when the task owning the parameter context
+        * is dying. There should not be any user of that context left
+        * to disturb us, anymore. */
+       unsigned long leftovers = context->count;
+       while (leftovers--)
+               ds_put_context(context);
+}
+#endif /* CONFIG_X86_DS */
index 06cc8d4..945a31c 100644 (file)
@@ -414,9 +414,11 @@ void __init efi_init(void)
        if (memmap.map == NULL)
                printk(KERN_ERR "Could not map the EFI memory map!\n");
        memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
        if (memmap.desc_size != sizeof(efi_memory_desc_t))
-               printk(KERN_WARNING "Kernel-defined memdesc"
-                      "doesn't match the one from EFI!\n");
+               printk(KERN_WARNING
+                 "Kernel-defined memdesc doesn't match the one from EFI!\n");
+
        if (add_efi_memmap)
                do_add_efi_memmap();
 
index 89434d4..cf3a0b2 100644 (file)
@@ -275,9 +275,9 @@ ENTRY(native_usergs_sysret64)
 ENTRY(ret_from_fork)
        CFI_DEFAULT_STACK
        push kernel_eflags(%rip)
-       CFI_ADJUST_CFA_OFFSET 4
+       CFI_ADJUST_CFA_OFFSET 8
        popf                            # reset kernel eflags
-       CFI_ADJUST_CFA_OFFSET -4
+       CFI_ADJUST_CFA_OFFSET -8
        call schedule_tail
        GET_THREAD_INFO(%rcx)
        testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
index 9bfc4d7..d16084f 100644 (file)
@@ -108,12 +108,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
        }
        load_idt((const struct desc_ptr *)&idt_descr);
 
-       early_printk("Kernel alive\n");
+       if (console_loglevel == 10)
+               early_printk("Kernel alive\n");
 
        x86_64_init_pda();
 
-       early_printk("Kernel really alive\n");
-
        x86_64_start_reservations(real_mode_data);
 }
 
index 50e5e4a..1919143 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/thread_info.h>
 #include <linux/syscalls.h>
+#include <asm/syscalls.h>
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
 static void set_bitmap(unsigned long *bitmap, unsigned int base,
index 3f7537b..f1c688e 100644 (file)
@@ -20,6 +20,8 @@
 
 #ifdef CONFIG_X86_32
 #include <mach_apic.h>
+#include <mach_ipi.h>
+
 /*
  * the following functions deal with sending IPIs between CPUs.
  *
@@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
 }
 
 /* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
 static int convert_apicid_to_cpu(int apic_id)
 {
        int i;
index 1cf8c1f..b71e02d 100644 (file)
@@ -325,7 +325,7 @@ skip:
                for_each_online_cpu(j)
                        seq_printf(p, "%10u ",
                                per_cpu(irq_stat,j).irq_call_count);
-               seq_printf(p, "  function call interrupts\n");
+               seq_printf(p, "  Function call interrupts\n");
                seq_printf(p, "TLB: ");
                for_each_online_cpu(j)
                        seq_printf(p, "%10u ",
index 1f78b23..f065fe9 100644 (file)
@@ -129,7 +129,7 @@ skip:
                seq_printf(p, "CAL: ");
                for_each_online_cpu(j)
                        seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-               seq_printf(p, "  function call interrupts\n");
+               seq_printf(p, "  Function call interrupts\n");
                seq_printf(p, "TLB: ");
                for_each_online_cpu(j)
                        seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
index 8b7a3cf..478bca9 100644 (file)
@@ -178,7 +178,7 @@ static void kvm_flush_tlb(void)
        kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
 }
 
-static void kvm_release_pt(u32 pfn)
+static void kvm_release_pt(unsigned long pfn)
 {
        struct kvm_mmu_op_release_pt rpt = {
                .header.op = KVM_MMU_OP_RELEASE_PT,
index b68e21f..0ed5f93 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/ldt.h>
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
+#include <asm/syscalls.h>
 
 #ifdef CONFIG_SMP
 static void flush_ldt(void *current_mm)
index abb78a2..2c97f07 100644 (file)
@@ -299,6 +299,15 @@ void acpi_nmi_disable(void)
                on_each_cpu(__acpi_nmi_disable, NULL, 1);
 }
 
+/*
+ * This function is called as soon the LAPIC NMI watchdog driver has everything
+ * in place and it's ready to check if the NMIs belong to the NMI watchdog
+ */
+void cpu_nmi_set_wd_enabled(void)
+{
+       __get_cpu_var(wd_enabled) = 1;
+}
+
 void setup_apic_nmi_watchdog(void *unused)
 {
        if (__get_cpu_var(wd_enabled))
@@ -311,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused)
 
        switch (nmi_watchdog) {
        case NMI_LOCAL_APIC:
-                /* enable it before to avoid race with handler */
-               __get_cpu_var(wd_enabled) = 1;
                if (lapic_watchdog_init(nmi_hz) < 0) {
                        __get_cpu_var(wd_enabled) = 0;
                        return;
index 3e66722..7a13fac 100644 (file)
@@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd);
 static void __init platform_detect(void)
 {
        size_t propsize;
-       u32 rev;
+       __be32 rev;
 
        if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4,
                        &propsize) || propsize != 4) {
                printk(KERN_ERR "ofw: getprop call failed!\n");
-               rev = 0;
+               rev = cpu_to_be32(0);
        }
        olpc_platform_info.boardrev = be32_to_cpu(rev);
 }
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
 static void __init platform_detect(void)
 {
        /* stopgap until OFW support is added to the kernel */
-       olpc_platform_info.boardrev = be32_to_cpu(0xc2);
+       olpc_platform_info.boardrev = 0xc2;
 }
 #endif
 
index 300da17..e2f4376 100644 (file)
@@ -330,6 +330,7 @@ struct pv_cpu_ops pv_cpu_ops = {
 #endif
        .wbinvd = native_wbinvd,
        .read_msr = native_read_msr_safe,
+       .read_msr_amd = native_read_msr_amd_safe,
        .write_msr = native_write_msr_safe,
        .read_tsc = native_read_tsc,
        .read_pmc = native_read_pmc,
index 5826221..9fe644f 100644 (file)
@@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                        start = start_##ops##_##x;              \
                        end = end_##ops##_##x;                  \
                        goto patch_site
-       switch(type) {
+       switch (type) {
                PATCH_SITE(pv_irq_ops, irq_disable);
                PATCH_SITE(pv_irq_ops, irq_enable);
                PATCH_SITE(pv_irq_ops, restore_fl);
index 87d4d69..f704cb5 100644 (file)
@@ -82,7 +82,7 @@ void __init dma32_reserve_bootmem(void)
         * using 512M as goal
         */
        align = 64ULL<<20;
-       size = round_up(dma32_bootmem_size, align);
+       size = roundup(dma32_bootmem_size, align);
        dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
                                 512ULL<<20);
        if (dma32_bootmem_ptr)
index be33a54..1a895a5 100644 (file)
@@ -82,7 +82,8 @@ AGPEXTERN __u32 *agp_gatt_table;
 static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
 static int need_flush;         /* global flush state. set for each gart wrap */
 
-static unsigned long alloc_iommu(struct device *dev, int size)
+static unsigned long alloc_iommu(struct device *dev, int size,
+                                unsigned long align_mask)
 {
        unsigned long offset, flags;
        unsigned long boundary_size;
@@ -90,16 +91,17 @@ static unsigned long alloc_iommu(struct device *dev, int size)
 
        base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
                           PAGE_SIZE) >> PAGE_SHIFT;
-       boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+       boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
                              PAGE_SIZE) >> PAGE_SHIFT;
 
        spin_lock_irqsave(&iommu_bitmap_lock, flags);
        offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
-                                 size, base_index, boundary_size, 0);
+                                 size, base_index, boundary_size, align_mask);
        if (offset == -1) {
                need_flush = 1;
                offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
-                                         size, base_index, boundary_size, 0);
+                                         size, base_index, boundary_size,
+                                         align_mask);
        }
        if (offset != -1) {
                next_bit = offset+size;
@@ -236,10 +238,10 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
  * Caller needs to check if the iommu is needed and flush.
  */
 static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
-                               size_t size, int dir)
+                               size_t size, int dir, unsigned long align_mask)
 {
        unsigned long npages = iommu_num_pages(phys_mem, size);
-       unsigned long iommu_page = alloc_iommu(dev, npages);
+       unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
        int i;
 
        if (iommu_page == -1) {
@@ -262,7 +264,11 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 static dma_addr_t
 gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
 {
-       dma_addr_t map = dma_map_area(dev, paddr, size, dir);
+       dma_addr_t map;
+       unsigned long align_mask;
+
+       align_mask = (1UL << get_order(size)) - 1;
+       map = dma_map_area(dev, paddr, size, dir, align_mask);
 
        flush_gart();
 
@@ -281,7 +287,8 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
        if (!need_iommu(dev, paddr, size))
                return paddr;
 
-       bus = gart_map_simple(dev, paddr, size, dir);
+       bus = dma_map_area(dev, paddr, size, dir, 0);
+       flush_gart();
 
        return bus;
 }
@@ -340,7 +347,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
                unsigned long addr = sg_phys(s);
 
                if (nonforced_iommu(dev, addr, s->length)) {
-                       addr = dma_map_area(dev, addr, s->length, dir);
+                       addr = dma_map_area(dev, addr, s->length, dir, 0);
                        if (addr == bad_dma_address) {
                                if (i > 0)
                                        gart_unmap_sg(dev, sg, i, dir);
@@ -362,7 +369,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
                          int nelems, struct scatterlist *sout,
                          unsigned long pages)
 {
-       unsigned long iommu_start = alloc_iommu(dev, pages);
+       unsigned long iommu_start = alloc_iommu(dev, pages, 0);
        unsigned long iommu_page = iommu_start;
        struct scatterlist *s;
        int i;
index bc1f2d3..a311ffc 100644 (file)
@@ -1,20 +1,13 @@
 #include <linux/platform_device.h>
-#include <linux/errno.h>
+#include <linux/err.h>
 #include <linux/init.h>
 
 static __init int add_pcspkr(void)
 {
        struct platform_device *pd;
-       int ret;
 
-       pd = platform_device_alloc("pcspkr", -1);
-       if (!pd)
-               return -ENOMEM;
+       pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
 
-       ret = platform_device_add(pd);
-       if (ret)
-               platform_device_put(pd);
-
-       return ret;
+       return IS_ERR(pd) ? PTR_ERR(pd) : 0;
 }
 device_initcall(add_pcspkr);
index 876e918..ec7a2ba 100644 (file)
@@ -185,7 +185,8 @@ static void mwait_idle(void)
 static void poll_idle(void)
 {
        local_irq_enable();
-       cpu_relax();
+       while (!need_resched())
+               cpu_relax();
 }
 
 /*
index 31f40b2..205188d 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/tick.h>
 #include <linux/percpu.h>
 #include <linux/prctl.h>
+#include <linux/dmi.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -56,6 +57,8 @@
 #include <asm/cpu.h>
 #include <asm/kdebug.h>
 #include <asm/idle.h>
+#include <asm/syscalls.h>
+#include <asm/smp.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -161,6 +164,7 @@ void __show_registers(struct pt_regs *regs, int all)
        unsigned long d0, d1, d2, d3, d6, d7;
        unsigned long sp;
        unsigned short ss, gs;
+       const char *board;
 
        if (user_mode_vm(regs)) {
                sp = regs->sp;
@@ -173,11 +177,15 @@ void __show_registers(struct pt_regs *regs, int all)
        }
 
        printk("\n");
-       printk("Pid: %d, comm: %s %s (%s %.*s)\n",
+
+       board = dmi_get_system_info(DMI_PRODUCT_NAME);
+       if (!board)
+               board = "";
+       printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
                        task_pid_nr(current), current->comm,
                        print_tainted(), init_utsname()->release,
                        (int)strcspn(init_utsname()->version, " "),
-                       init_utsname()->version);
+                       init_utsname()->version, board);
 
        printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
                        (u16)regs->cs, regs->ip, regs->flags,
@@ -277,6 +285,14 @@ void exit_thread(void)
                tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
                put_cpu();
        }
+#ifdef CONFIG_X86_DS
+       /* Free any DS contexts that have not been properly released. */
+       if (unlikely(current->thread.ds_ctx)) {
+               /* we clear debugctl to make sure DS is not used. */
+               update_debugctlmsr(0);
+               ds_free(current->thread.ds_ctx);
+       }
+#endif /* CONFIG_X86_DS */
 }
 
 void flush_thread(void)
@@ -438,6 +454,35 @@ int set_tsc_mode(unsigned int val)
        return 0;
 }
 
+#ifdef CONFIG_X86_DS
+static int update_debugctl(struct thread_struct *prev,
+                       struct thread_struct *next, unsigned long debugctl)
+{
+       unsigned long ds_prev = 0;
+       unsigned long ds_next = 0;
+
+       if (prev->ds_ctx)
+               ds_prev = (unsigned long)prev->ds_ctx->ds;
+       if (next->ds_ctx)
+               ds_next = (unsigned long)next->ds_ctx->ds;
+
+       if (ds_next != ds_prev) {
+               /* we clear debugctl to make sure DS
+                * is not in use when we change it */
+               debugctl = 0;
+               update_debugctlmsr(0);
+               wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
+       }
+       return debugctl;
+}
+#else
+static int update_debugctl(struct thread_struct *prev,
+                       struct thread_struct *next, unsigned long debugctl)
+{
+       return debugctl;
+}
+#endif /* CONFIG_X86_DS */
+
 static noinline void
 __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
                 struct tss_struct *tss)
@@ -448,14 +493,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
        prev = &prev_p->thread;
        next = &next_p->thread;
 
-       debugctl = prev->debugctlmsr;
-       if (next->ds_area_msr != prev->ds_area_msr) {
-               /* we clear debugctl to make sure DS
-                * is not in use when we change it */
-               debugctl = 0;
-               update_debugctlmsr(0);
-               wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
-       }
+       debugctl = update_debugctl(prev, next, prev->debugctlmsr);
 
        if (next->debugctlmsr != debugctl)
                update_debugctlmsr(next->debugctlmsr);
@@ -479,13 +517,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
                        hard_enable_TSC();
        }
 
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
        if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
                ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 
        if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
                ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 
        if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
index e12e0e4..2a8ccb9 100644 (file)
 #include <linux/kdebug.h>
 #include <linux/tick.h>
 #include <linux/prctl.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 
-#include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/mmu_context.h>
@@ -51,6 +51,7 @@
 #include <asm/proto.h>
 #include <asm/ia32.h>
 #include <asm/idle.h>
+#include <asm/syscalls.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -88,7 +89,7 @@ void exit_idle(void)
 #ifdef CONFIG_HOTPLUG_CPU
 DECLARE_PER_CPU(int, cpu_state);
 
-#include <asm/nmi.h>
+#include <linux/nmi.h>
 /* We halt the CPU with physical CPU hotplug */
 static inline void play_dead(void)
 {
@@ -153,7 +154,7 @@ void cpu_idle(void)
 }
 
 /* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs * regs)
+void __show_regs(struct pt_regs *regs)
 {
        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
        unsigned long d0, d1, d2, d3, d6, d7;
@@ -162,59 +163,61 @@ void __show_regs(struct pt_regs * regs)
 
        printk("\n");
        print_modules();
-       printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+       printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
                current->pid, current->comm, print_tainted(),
                init_utsname()->release,
                (int)strcspn(init_utsname()->version, " "),
                init_utsname()->version);
-       printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+       printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
        printk_address(regs->ip, 1);
-       printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
-               regs->flags);
-       printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+       printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
+                       regs->sp, regs->flags);
+       printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
               regs->ax, regs->bx, regs->cx);
-       printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+       printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
               regs->dx, regs->si, regs->di);
-       printk("RBP: %016lx R08: %016lx R09: %016lx\n",
+       printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
               regs->bp, regs->r8, regs->r9);
-       printk("R10: %016lx R11: %016lx R12: %016lx\n",
-              regs->r10, regs->r11, regs->r12); 
-       printk("R13: %016lx R14: %016lx R15: %016lx\n",
-              regs->r13, regs->r14, regs->r15); 
-
-       asm("movl %%ds,%0" : "=r" (ds)); 
-       asm("movl %%cs,%0" : "=r" (cs)); 
-       asm("movl %%es,%0" : "=r" (es)); 
+       printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+              regs->r10, regs->r11, regs->r12);
+       printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+              regs->r13, regs->r14, regs->r15);
+
+       asm("movl %%ds,%0" : "=r" (ds));
+       asm("movl %%cs,%0" : "=r" (cs));
+       asm("movl %%es,%0" : "=r" (es));
        asm("movl %%fs,%0" : "=r" (fsindex));
        asm("movl %%gs,%0" : "=r" (gsindex));
 
        rdmsrl(MSR_FS_BASE, fs);
-       rdmsrl(MSR_GS_BASE, gs); 
-       rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
+       rdmsrl(MSR_GS_BASE, gs);
+       rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 
        cr0 = read_cr0();
        cr2 = read_cr2();
        cr3 = read_cr3();
        cr4 = read_cr4();
 
-       printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
-              fs,fsindex,gs,gsindex,shadowgs); 
-       printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
-       printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+       printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+              fs, fsindex, gs, gsindex, shadowgs);
+       printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
+                       es, cr0);
+       printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
+                       cr4);
 
        get_debugreg(d0, 0);
        get_debugreg(d1, 1);
        get_debugreg(d2, 2);
-       printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+       printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
        get_debugreg(d3, 3);
        get_debugreg(d6, 6);
        get_debugreg(d7, 7);
-       printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+       printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 }
 
 void show_regs(struct pt_regs *regs)
 {
-       printk("CPU %d:", smp_processor_id());
+       printk(KERN_INFO "CPU %d:", smp_processor_id());
        __show_regs(regs);
        show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 }
@@ -240,6 +243,14 @@ void exit_thread(void)
                t->io_bitmap_max = 0;
                put_cpu();
        }
+#ifdef CONFIG_X86_DS
+       /* Free any DS contexts that have not been properly released. */
+       if (unlikely(t->ds_ctx)) {
+               /* we clear debugctl to make sure DS is not used. */
+               update_debugctlmsr(0);
+               ds_free(t->ds_ctx);
+       }
+#endif /* CONFIG_X86_DS */
 }
 
 void flush_thread(void)
@@ -315,10 +326,10 @@ void prepare_to_copy(struct task_struct *tsk)
 
 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
                unsigned long unused,
-       struct task_struct * p, struct pt_regs * regs)
+       struct task_struct *p, struct pt_regs *regs)
 {
        int err;
-       struct pt_regs * childregs;
+       struct pt_regs *childregs;
        struct task_struct *me = current;
 
        childregs = ((struct pt_regs *)
@@ -363,10 +374,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
                if (test_thread_flag(TIF_IA32))
                        err = do_set_thread_area(p, -1,
                                (struct user_desc __user *)childregs->si, 0);
-               else                    
-#endif  
-                       err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
-               if (err) 
+               else
+#endif
+                       err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
+               if (err)
                        goto out;
        }
        err = 0;
@@ -473,13 +484,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
        next = &next_p->thread;
 
        debugctl = prev->debugctlmsr;
-       if (next->ds_area_msr != prev->ds_area_msr) {
-               /* we clear debugctl to make sure DS
-                * is not in use when we change it */
-               debugctl = 0;
-               update_debugctlmsr(0);
-               wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+
+#ifdef CONFIG_X86_DS
+       {
+               unsigned long ds_prev = 0, ds_next = 0;
+
+               if (prev->ds_ctx)
+                       ds_prev = (unsigned long)prev->ds_ctx->ds;
+               if (next->ds_ctx)
+                       ds_next = (unsigned long)next->ds_ctx->ds;
+
+               if (ds_next != ds_prev) {
+                       /*
+                        * We clear debugctl to make sure DS
+                        * is not in use when we change it:
+                        */
+                       debugctl = 0;
+                       update_debugctlmsr(0);
+                       wrmsrl(MSR_IA32_DS_AREA, ds_next);
+               }
        }
+#endif /* CONFIG_X86_DS */
 
        if (next->debugctlmsr != debugctl)
                update_debugctlmsr(next->debugctlmsr);
@@ -517,13 +542,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
                memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
        }
 
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
        if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
                ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 
        if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
                ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
 }
 
 /*
@@ -545,7 +570,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        unsigned fsindex, gsindex;
 
        /* we're going to use this soon, after a few expensive things */
-       if (next_p->fpu_counter>5)
+       if (next_p->fpu_counter > 5)
                prefetch(next->xstate);
 
        /*
@@ -553,13 +578,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         */
        load_sp0(tss, next);
 
-       /* 
+       /*
         * Switch DS and ES.
         * This won't pick up thread selector changes, but I guess that is ok.
         */
        savesegment(es, prev->es);
        if (unlikely(next->es | prev->es))
-               loadsegment(es, next->es); 
+               loadsegment(es, next->es);
 
        savesegment(ds, prev->ds);
        if (unlikely(next->ds | prev->ds))
@@ -585,7 +610,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         */
        arch_leave_lazy_cpu_mode();
 
-       /* 
+       /*
         * Switch FS and GS.
         *
         * Segment register != 0 always requires a reload.  Also
@@ -594,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         */
        if (unlikely(fsindex | next->fsindex | prev->fs)) {
                loadsegment(fs, next->fsindex);
-               /* 
+               /*
                 * Check if the user used a selector != 0; if yes
                 *  clear 64bit base, since overloaded base is always
                 *  mapped to the Null selector
                 */
                if (fsindex)
-                       prev->fs = 0;                           
+                       prev->fs = 0;
        }
        /* when next process has a 64bit base use it */
        if (next->fs)
@@ -610,7 +635,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        if (unlikely(gsindex | next->gsindex | prev->gs)) {
                load_gs_index(next->gsindex);
                if (gsindex)
-                       prev->gs = 0;                           
+                       prev->gs = 0;
        }
        if (next->gs)
                wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
@@ -619,12 +644,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        /* Must be after DS reload */
        unlazy_fpu(prev_p);
 
-       /* 
+       /*
         * Switch the PDA and FPU contexts.
         */
        prev->usersp = read_pda(oldrsp);
        write_pda(oldrsp, next->usersp);
-       write_pda(pcurrent, next_p); 
+       write_pda(pcurrent, next_p);
 
        write_pda(kernelstack,
                  (unsigned long)task_stack_page(next_p) +
@@ -665,7 +690,7 @@ long sys_execve(char __user *name, char __user * __user *argv,
                char __user * __user *envp, struct pt_regs *regs)
 {
        long error;
-       char * filename;
+       char *filename;
 
        filename = getname(name);
        error = PTR_ERR(filename);
@@ -723,55 +748,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs)
 unsigned long get_wchan(struct task_struct *p)
 {
        unsigned long stack;
-       u64 fp,ip;
+       u64 fp, ip;
        int count = 0;
 
-       if (!p || p == current || p->state==TASK_RUNNING)
-               return 0; 
+       if (!p || p == current || p->state == TASK_RUNNING)
+               return 0;
        stack = (unsigned long)task_stack_page(p);
        if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
                return 0;
        fp = *(u64 *)(p->thread.sp);
-       do { 
+       do {
                if (fp < (unsigned long)stack ||
                    fp > (unsigned long)stack+THREAD_SIZE)
-                       return 0; 
+                       return 0;
                ip = *(u64 *)(fp+8);
                if (!in_sched_functions(ip))
                        return ip;
-               fp = *(u64 *)fp; 
-       } while (count++ < 16); 
+               fp = *(u64 *)fp;
+       } while (count++ < 16);
        return 0;
 }
 
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{ 
-       int ret = 0; 
+{
+       int ret = 0;
        int doit = task == current;
        int cpu;
 
-       switch (code) { 
+       switch (code) {
        case ARCH_SET_GS:
                if (addr >= TASK_SIZE_OF(task))
-                       return -EPERM; 
+                       return -EPERM;
                cpu = get_cpu();
-               /* handle small bases via the GDT because that's faster to 
+               /* handle small bases via the GDT because that's faster to
                   switch. */
-               if (addr <= 0xffffffff) {  
-                       set_32bit_tls(task, GS_TLS, addr); 
-                       if (doit) { 
+               if (addr <= 0xffffffff) {
+                       set_32bit_tls(task, GS_TLS, addr);
+                       if (doit) {
                                load_TLS(&task->thread, cpu);
-                               load_gs_index(GS_TLS_SEL); 
+                               load_gs_index(GS_TLS_SEL);
                        }
-                       task->thread.gsindex = GS_TLS_SEL; 
+                       task->thread.gsindex = GS_TLS_SEL;
                        task->thread.gs = 0;
-               } else { 
+               } else {
                        task->thread.gsindex = 0;
                        task->thread.gs = addr;
                        if (doit) {
                                load_gs_index(0);
                                ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
-                       } 
+                       }
                }
                put_cpu();
                break;
@@ -825,8 +850,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
                                rdmsrl(MSR_KERNEL_GS_BASE, base);
                        else
                                base = task->thread.gs;
-               }
-               else
+               } else
                        base = task->thread.gs;
                ret = put_user(base, (unsigned long __user *)addr);
                break;
index e37dccc..e375b65 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
+#include <linux/tracehook.h>
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/security.h>
@@ -69,7 +70,7 @@ static inline bool invalid_selector(u16 value)
 
 #define FLAG_MASK              FLAG_MASK_32
 
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
        BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
        regno >>= 2;
@@ -554,45 +555,115 @@ static int ptrace_set_debugreg(struct task_struct *child,
        return 0;
 }
 
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * The configuration for a particular BTS hardware implementation.
+ */
+struct bts_configuration {
+       /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
+       unsigned char  sizeof_bts;
+       /* the size of a field in the BTS record in bytes */
+       unsigned char  sizeof_field;
+       /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
+       unsigned long debugctl_mask;
+};
+static struct bts_configuration bts_cfg;
+
+#define BTS_MAX_RECORD_SIZE (8 * 3)
+
+
+/*
+ * Branch Trace Store (BTS) uses the following format. Different
+ * architectures vary in the size of those fields.
+ * - source linear address
+ * - destination linear address
+ * - flags
+ *
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
+ *
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ *
+ * In order to store additional information in the BTS buffer, we use
+ * a special source address to indicate that the record requires
+ * special interpretation.
+ *
+ * Netburst indicated via a bit in the flags field whether the branch
+ * was predicted; this is ignored.
+ */
+
+enum bts_field {
+       bts_from = 0,
+       bts_to,
+       bts_flags,
+
+       bts_escape = (unsigned long)-1,
+       bts_qual = bts_to,
+       bts_jiffies = bts_flags
+};
+
+static inline unsigned long bts_get(const char *base, enum bts_field field)
+{
+       base += (bts_cfg.sizeof_field * field);
+       return *(unsigned long *)base;
+}
 
-static int ptrace_bts_get_size(struct task_struct *child)
+static inline void bts_set(char *base, enum bts_field field, unsigned long val)
 {
-       if (!child->thread.ds_area_msr)
-               return -ENXIO;
+       base += (bts_cfg.sizeof_field * field);;
+       (*(unsigned long *)base) = val;
+}
 
-       return ds_get_bts_index((void *)child->thread.ds_area_msr);
+/*
+ * Translate a BTS record from the raw format into the bts_struct format
+ *
+ * out (out): bts_struct interpretation
+ * raw: raw BTS record
+ */
+static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
+{
+       memset(out, 0, sizeof(*out));
+       if (bts_get(raw, bts_from) == bts_escape) {
+               out->qualifier       = bts_get(raw, bts_qual);
+               out->variant.jiffies = bts_get(raw, bts_jiffies);
+       } else {
+               out->qualifier = BTS_BRANCH;
+               out->variant.lbr.from_ip = bts_get(raw, bts_from);
+               out->variant.lbr.to_ip   = bts_get(raw, bts_to);
+       }
 }
 
-static int ptrace_bts_read_record(struct task_struct *child,
-                                 long index,
+static int ptrace_bts_read_record(struct task_struct *child, size_t index,
                                  struct bts_struct __user *out)
 {
        struct bts_struct ret;
-       int retval;
-       int bts_end;
-       int bts_index;
-
-       if (!child->thread.ds_area_msr)
-               return -ENXIO;
+       const void *bts_record;
+       size_t bts_index, bts_end;
+       int error;
 
-       if (index < 0)
-               return -EINVAL;
+       error = ds_get_bts_end(child, &bts_end);
+       if (error < 0)
+               return error;
 
-       bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
        if (bts_end <= index)
                return -EINVAL;
 
+       error = ds_get_bts_index(child, &bts_index);
+       if (error < 0)
+               return error;
+
        /* translate the ptrace bts index into the ds bts index */
-       bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
-       bts_index -= (index + 1);
-       if (bts_index < 0)
-               bts_index += bts_end;
+       bts_index += bts_end - (index + 1);
+       if (bts_end <= bts_index)
+               bts_index -= bts_end;
 
-       retval = ds_read_bts((void *)child->thread.ds_area_msr,
-                            bts_index, &ret);
-       if (retval < 0)
-               return retval;
+       error = ds_access_bts(child, bts_index, &bts_record);
+       if (error < 0)
+               return error;
+
+       ptrace_bts_translate_record(&ret, bts_record);
 
        if (copy_to_user(out, &ret, sizeof(ret)))
                return -EFAULT;
@@ -600,101 +671,106 @@ static int ptrace_bts_read_record(struct task_struct *child,
        return sizeof(ret);
 }
 
-static int ptrace_bts_clear(struct task_struct *child)
-{
-       if (!child->thread.ds_area_msr)
-               return -ENXIO;
-
-       return ds_clear((void *)child->thread.ds_area_msr);
-}
-
 static int ptrace_bts_drain(struct task_struct *child,
                            long size,
                            struct bts_struct __user *out)
 {
-       int end, i;
-       void *ds = (void *)child->thread.ds_area_msr;
-
-       if (!ds)
-               return -ENXIO;
+       struct bts_struct ret;
+       const unsigned char *raw;
+       size_t end, i;
+       int error;
 
-       end = ds_get_bts_index(ds);
-       if (end <= 0)
-               return end;
+       error = ds_get_bts_index(child, &end);
+       if (error < 0)
+               return error;
 
        if (size < (end * sizeof(struct bts_struct)))
                return -EIO;
 
-       for (i = 0; i < end; i++, out++) {
-               struct bts_struct ret;
-               int retval;
+       error = ds_access_bts(child, 0, (const void **)&raw);
+       if (error < 0)
+               return error;
 
-               retval = ds_read_bts(ds, i, &ret);
-               if (retval < 0)
-                       return retval;
+       for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
+               ptrace_bts_translate_record(&ret, raw);
 
                if (copy_to_user(out, &ret, sizeof(ret)))
                        return -EFAULT;
        }
 
-       ds_clear(ds);
+       error = ds_clear_bts(child);
+       if (error < 0)
+               return error;
 
        return end;
 }
 
+static void ptrace_bts_ovfl(struct task_struct *child)
+{
+       send_sig(child->thread.bts_ovfl_signal, child, 0);
+}
+
 static int ptrace_bts_config(struct task_struct *child,
                             long cfg_size,
                             const struct ptrace_bts_config __user *ucfg)
 {
        struct ptrace_bts_config cfg;
-       int bts_size, ret = 0;
-       void *ds;
+       int error = 0;
+
+       error = -EOPNOTSUPP;
+       if (!bts_cfg.sizeof_bts)
+               goto errout;
 
+       error = -EIO;
        if (cfg_size < sizeof(cfg))
-               return -EIO;
+               goto errout;
 
+       error = -EFAULT;
        if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
-               return -EFAULT;
+               goto errout;
 
-       if ((int)cfg.size < 0)
-               return -EINVAL;
+       error = -EINVAL;
+       if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
+           !(cfg.flags & PTRACE_BTS_O_ALLOC))
+               goto errout;
 
-       bts_size = 0;
-       ds = (void *)child->thread.ds_area_msr;
-       if (ds) {
-               bts_size = ds_get_bts_size(ds);
-               if (bts_size < 0)
-                       return bts_size;
-       }
-       cfg.size = PAGE_ALIGN(cfg.size);
+       if (cfg.flags & PTRACE_BTS_O_ALLOC) {
+               ds_ovfl_callback_t ovfl = NULL;
+               unsigned int sig = 0;
+
+               /* we ignore the error in case we were not tracing child */
+               (void)ds_release_bts(child);
 
-       if (bts_size != cfg.size) {
-               ret = ptrace_bts_realloc(child, cfg.size,
-                                        cfg.flags & PTRACE_BTS_O_CUT_SIZE);
-               if (ret < 0)
+               if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+                       if (!cfg.signal)
+                               goto errout;
+
+                       sig  = cfg.signal;
+                       ovfl = ptrace_bts_ovfl;
+               }
+
+               error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
+               if (error < 0)
                        goto errout;
 
-               ds = (void *)child->thread.ds_area_msr;
+               child->thread.bts_ovfl_signal = sig;
        }
 
-       if (cfg.flags & PTRACE_BTS_O_SIGNAL)
-               ret = ds_set_overflow(ds, DS_O_SIGNAL);
-       else
-               ret = ds_set_overflow(ds, DS_O_WRAP);
-       if (ret < 0)
+       error = -EINVAL;
+       if (!child->thread.ds_ctx && cfg.flags)
                goto errout;
 
        if (cfg.flags & PTRACE_BTS_O_TRACE)
-               child->thread.debugctlmsr |= ds_debugctl_mask();
+               child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
        else
-               child->thread.debugctlmsr &= ~ds_debugctl_mask();
+               child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
 
        if (cfg.flags & PTRACE_BTS_O_SCHED)
                set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
        else
                clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 
-       ret = sizeof(cfg);
+       error = sizeof(cfg);
 
 out:
        if (child->thread.debugctlmsr)
@@ -702,10 +778,10 @@ out:
        else
                clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 
-       return ret;
+       return error;
 
 errout:
-       child->thread.debugctlmsr &= ~ds_debugctl_mask();
+       child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
        clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
        goto out;
 }
@@ -714,29 +790,40 @@ static int ptrace_bts_status(struct task_struct *child,
                             long cfg_size,
                             struct ptrace_bts_config __user *ucfg)
 {
-       void *ds = (void *)child->thread.ds_area_msr;
        struct ptrace_bts_config cfg;
+       size_t end;
+       const void *base, *max;
+       int error;
 
        if (cfg_size < sizeof(cfg))
                return -EIO;
 
-       memset(&cfg, 0, sizeof(cfg));
+       error = ds_get_bts_end(child, &end);
+       if (error < 0)
+               return error;
 
-       if (ds) {
-               cfg.size = ds_get_bts_size(ds);
+       error = ds_access_bts(child, /* index = */ 0, &base);
+       if (error < 0)
+               return error;
 
-               if (ds_get_overflow(ds) == DS_O_SIGNAL)
-                       cfg.flags |= PTRACE_BTS_O_SIGNAL;
+       error = ds_access_bts(child, /* index = */ end, &max);
+       if (error < 0)
+               return error;
 
-               if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
-                   child->thread.debugctlmsr & ds_debugctl_mask())
-                       cfg.flags |= PTRACE_BTS_O_TRACE;
+       memset(&cfg, 0, sizeof(cfg));
+       cfg.size = (max - base);
+       cfg.signal = child->thread.bts_ovfl_signal;
+       cfg.bts_size = sizeof(struct bts_struct);
 
-               if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
-                       cfg.flags |= PTRACE_BTS_O_SCHED;
-       }
+       if (cfg.signal)
+               cfg.flags |= PTRACE_BTS_O_SIGNAL;
 
-       cfg.bts_size = sizeof(struct bts_struct);
+       if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+           child->thread.debugctlmsr & bts_cfg.debugctl_mask)
+               cfg.flags |= PTRACE_BTS_O_TRACE;
+
+       if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+               cfg.flags |= PTRACE_BTS_O_SCHED;
 
        if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
                return -EFAULT;
@@ -744,89 +831,38 @@ static int ptrace_bts_status(struct task_struct *child,
        return sizeof(cfg);
 }
 
-
 static int ptrace_bts_write_record(struct task_struct *child,
                                   const struct bts_struct *in)
 {
-       int retval;
+       unsigned char bts_record[BTS_MAX_RECORD_SIZE];
 
-       if (!child->thread.ds_area_msr)
-               return -ENXIO;
+       BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
 
-       retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
-       if (retval)
-               return retval;
+       memset(bts_record, 0, bts_cfg.sizeof_bts);
+       switch (in->qualifier) {
+       case BTS_INVALID:
+               break;
 
-       return sizeof(*in);
-}
+       case BTS_BRANCH:
+               bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
+               bts_set(bts_record, bts_to,   in->variant.lbr.to_ip);
+               break;
 
-static int ptrace_bts_realloc(struct task_struct *child,
-                             int size, int reduce_size)
-{
-       unsigned long rlim, vm;
-       int ret, old_size;
+       case BTS_TASK_ARRIVES:
+       case BTS_TASK_DEPARTS:
+               bts_set(bts_record, bts_from,    bts_escape);
+               bts_set(bts_record, bts_qual,    in->qualifier);
+               bts_set(bts_record, bts_jiffies, in->variant.jiffies);
+               break;
 
-       if (size < 0)
+       default:
                return -EINVAL;
-
-       old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
-       if (old_size < 0)
-               return old_size;
-
-       ret = ds_free((void **)&child->thread.ds_area_msr);
-       if (ret < 0)
-               goto out;
-
-       size >>= PAGE_SHIFT;
-       old_size >>= PAGE_SHIFT;
-
-       current->mm->total_vm  -= old_size;
-       current->mm->locked_vm -= old_size;
-
-       if (size == 0)
-               goto out;
-
-       rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-       vm = current->mm->total_vm  + size;
-       if (rlim < vm) {
-               ret = -ENOMEM;
-
-               if (!reduce_size)
-                       goto out;
-
-               size = rlim - current->mm->total_vm;
-               if (size <= 0)
-                       goto out;
-       }
-
-       rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-       vm = current->mm->locked_vm  + size;
-       if (rlim < vm) {
-               ret = -ENOMEM;
-
-               if (!reduce_size)
-                       goto out;
-
-               size = rlim - current->mm->locked_vm;
-               if (size <= 0)
-                       goto out;
        }
 
-       ret = ds_allocate((void **)&child->thread.ds_area_msr,
-                         size << PAGE_SHIFT);
-       if (ret < 0)
-               goto out;
-
-       current->mm->total_vm  += size;
-       current->mm->locked_vm += size;
-
-out:
-       if (child->thread.ds_area_msr)
-               set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-       else
-               clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-
-       return ret;
+       /* The writing task will be the switched-to task on a context
+        * switch. It needs to write into the switched-from task's BTS
+        * buffer. */
+       return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
 }
 
 void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -839,7 +875,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
 
        ptrace_bts_write_record(tsk, &rec);
 }
-#endif /* X86_BTS */
+
+static const struct bts_configuration bts_cfg_netburst = {
+       .sizeof_bts    = sizeof(long) * 3,
+       .sizeof_field  = sizeof(long),
+       .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
+};
+
+static const struct bts_configuration bts_cfg_pentium_m = {
+       .sizeof_bts    = sizeof(long) * 3,
+       .sizeof_field  = sizeof(long),
+       .debugctl_mask = (1<<6)|(1<<7)
+};
+
+static const struct bts_configuration bts_cfg_core2 = {
+       .sizeof_bts    = 8 * 3,
+       .sizeof_field  = 8,
+       .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+};
+
+static inline void bts_configure(const struct bts_configuration *cfg)
+{
+       bts_cfg = *cfg;
+}
+
+void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
+{
+       switch (c->x86) {
+       case 0x6:
+               switch (c->x86_model) {
+               case 0xD:
+               case 0xE: /* Pentium M */
+                       bts_configure(&bts_cfg_pentium_m);
+                       break;
+               case 0xF: /* Core2 */
+        case 0x1C: /* Atom */
+                       bts_configure(&bts_cfg_core2);
+                       break;
+               default:
+                       /* sorry, don't know about them */
+                       break;
+               }
+               break;
+       case 0xF:
+               switch (c->x86_model) {
+               case 0x0:
+               case 0x1:
+               case 0x2: /* Netburst */
+                       bts_configure(&bts_cfg_netburst);
+                       break;
+               default:
+                       /* sorry, don't know about them */
+                       break;
+               }
+               break;
+       default:
+               /* sorry, don't know about them */
+               break;
+       }
+}
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 /*
  * Called by kernel/ptrace.c when detaching..
@@ -852,15 +947,15 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-       if (child->thread.ds_area_msr) {
-#ifdef X86_BTS
-               ptrace_bts_realloc(child, 0, 0);
-#endif
-               child->thread.debugctlmsr &= ~ds_debugctl_mask();
-               if (!child->thread.debugctlmsr)
-                       clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-               clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
-       }
+#ifdef CONFIG_X86_PTRACE_BTS
+       (void)ds_release_bts(child);
+
+       child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+       if (!child->thread.debugctlmsr)
+               clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+
+       clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+#endif /* CONFIG_X86_PTRACE_BTS */
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -980,7 +1075,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
        /*
         * These bits need more cooking - not enabled yet:
         */
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
        case PTRACE_BTS_CONFIG:
                ret = ptrace_bts_config
                        (child, data, (struct ptrace_bts_config __user *)addr);
@@ -992,7 +1087,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
 
        case PTRACE_BTS_SIZE:
-               ret = ptrace_bts_get_size(child);
+               ret = ds_get_bts_index(child, /* pos = */ NULL);
                break;
 
        case PTRACE_BTS_GET:
@@ -1001,14 +1096,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
 
        case PTRACE_BTS_CLEAR:
-               ret = ptrace_bts_clear(child);
+               ret = ds_clear_bts(child);
                break;
 
        case PTRACE_BTS_DRAIN:
                ret = ptrace_bts_drain
                        (child, data, (struct bts_struct __user *) addr);
                break;
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
 
        default:
                ret = ptrace_request(child, request, addr, data);
@@ -1375,30 +1470,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
        force_sig_info(SIGTRAP, &info, tsk);
 }
 
-static void syscall_trace(struct pt_regs *regs)
-{
-       if (!(current->ptrace & PT_PTRACED))
-               return;
-
-#if 0
-       printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
-              current->comm,
-              regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-              current_thread_info()->flags, current->ptrace);
-#endif
-
-       ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-                               ? 0x80 : 0));
-       /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
-        */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
-}
 
 #ifdef CONFIG_X86_32
 # define IS_IA32       1
@@ -1432,8 +1503,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
        if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
                ret = -1L;
 
-       if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
-               syscall_trace(regs);
+       if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
+           tracehook_report_syscall_entry(regs))
+               ret = -1L;
 
        if (unlikely(current->audit_context)) {
                if (IS_IA32)
@@ -1459,7 +1531,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
                audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
        if (test_thread_flag(TIF_SYSCALL_TRACE))
-               syscall_trace(regs);
+               tracehook_report_syscall_exit(regs, 0);
 
        /*
         * If TIF_SYSCALL_EMU is set, we only get here because of
@@ -1475,6 +1547,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
         * system call instruction.
         */
        if (test_thread_flag(TIF_SINGLESTEP) &&
-           (current->ptrace & PT_PTRACED))
+           tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
                send_sigtrap(current, regs, 0);
 }
index 724adfc..f4c93f1 100644 (file)
@@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off);
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-enum reboot_type reboot_type = BOOT_KBD;
+/*
+ * Keyboard reset and triple fault may result in INIT, not RESET, which
+ * doesn't work when we're in vmx root mode.  Try ACPI first.
+ */
+enum reboot_type reboot_type = BOOT_ACPI;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
index 9838f25..141efab 100644 (file)
@@ -223,6 +223,9 @@ unsigned long saved_video_mode;
 #define RAMDISK_LOAD_FLAG              0x4000
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
+#ifdef CONFIG_CMDLINE_BOOL
+static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+#endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
@@ -665,6 +668,19 @@ void __init setup_arch(char **cmdline_p)
        bss_resource.start = virt_to_phys(&__bss_start);
        bss_resource.end = virt_to_phys(&__bss_stop)-1;
 
+#ifdef CONFIG_CMDLINE_BOOL
+#ifdef CONFIG_CMDLINE_OVERRIDE
+       strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+#else
+       if (builtin_cmdline[0]) {
+               /* append boot loader cmdline to builtin */
+               strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+               strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+               strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+       }
+#endif
+#endif
+
        strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
 
index 76e305e..0e67f72 100644 (file)
@@ -162,9 +162,16 @@ void __init setup_per_cpu_areas(void)
                        printk(KERN_INFO
                               "cpu %d has no node %d or node-local memory\n",
                                cpu, node);
+                       if (ptr)
+                               printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
+                                        cpu, __pa(ptr));
                }
-               else
+               else {
                        ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+                       if (ptr)
+                               printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
+                                        cpu, node, __pa(ptr));
+               }
 #endif
                per_cpu_offset(cpu) = ptr - __per_cpu_start;
                memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
index 72bbb51..8b4956e 100644 (file)
@@ -24,4 +24,9 @@ struct rt_sigframe {
        struct ucontext uc;
        struct siginfo info;
 };
+
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+               sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+               sigset_t *set, struct pt_regs *regs);
 #endif
index 6fb5bcd..2a2435d 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/tracehook.h>
 #include <linux/elf.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
@@ -26,6 +27,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
+#include <asm/syscalls.h>
 
 #include "sigframe.h"
 
@@ -558,8 +560,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
         * handler too.
         */
        regs->flags &= ~X86_EFLAGS_TF;
-       if (test_thread_flag(TIF_SINGLESTEP))
-               ptrace_notify(SIGTRAP);
 
        spin_lock_irq(&current->sighand->siglock);
        sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
@@ -568,6 +568,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
        recalc_sigpending();
        spin_unlock_irq(&current->sighand->siglock);
 
+       tracehook_signal_handler(sig, info, ka, regs,
+                                test_thread_flag(TIF_SINGLESTEP));
+
        return 0;
 }
 
@@ -661,5 +664,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
        if (thread_info_flags & _TIF_SIGPENDING)
                do_signal(regs);
 
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+       }
+
        clear_thread_flag(TIF_IRET);
 }
index ca316b5..694aa88 100644 (file)
 #include <linux/errno.h>
 #include <linux/wait.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/unistd.h>
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/compiler.h>
+#include <linux/uaccess.h>
+
 #include <asm/processor.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
 #include <asm/mce.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
 #include "sigframe.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 # define FIX_EFLAGS    __FIX_EFLAGS
 #endif
 
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-               sigset_t *set, struct pt_regs * regs); 
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
-            sigset_t *set, struct pt_regs * regs); 
-
 asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
                struct pt_regs *regs)
@@ -128,7 +127,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
        /* Always make any pending restarted system calls return -EINTR */
        current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)                err |= __get_user(regs->x, &sc->x)
+#define COPY(x)                (err |= __get_user(regs->x, &sc->x))
 
        COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
        COPY(dx); COPY(cx); COPY(ip);
@@ -158,7 +157,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
        }
 
        {
-               struct _fpstate __user * buf;
+               struct _fpstate __user *buf;
                err |= __get_user(buf, &sc->fpstate);
 
                if (buf) {
@@ -198,7 +197,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
        current->blocked = set;
        recalc_sigpending();
        spin_unlock_irq(&current->sighand->siglock);
-       
+
        if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
                goto badframe;
 
@@ -208,16 +207,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
        return ax;
 
 badframe:
-       signal_fault(regs,frame,"sigreturn");
+       signal_fault(regs, frame, "sigreturn");
        return 0;
-}      
+}
 
 /*
  * Set up a signal frame.
  */
 
 static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+               unsigned long mask, struct task_struct *me)
 {
        int err = 0;
 
@@ -273,35 +273,35 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
 }
 
 static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-                          sigset_t *set, struct pt_regs * regs)
+                          sigset_t *set, struct pt_regs *regs)
 {
        struct rt_sigframe __user *frame;
-       struct _fpstate __user *fp = NULL; 
+       struct _fpstate __user *fp = NULL;
        int err = 0;
        struct task_struct *me = current;
 
        if (used_math()) {
-               fp = get_stack(ka, regs, sizeof(struct _fpstate)); 
+               fp = get_stack(ka, regs, sizeof(struct _fpstate));
                frame = (void __user *)round_down(
                        (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
 
                if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
                        goto give_sigsegv;
 
-               if (save_i387(fp) < 0) 
-                       err |= -1; 
+               if (save_i387(fp) < 0)
+                       err |= -1;
        } else
                frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
 
        if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
                goto give_sigsegv;
 
-       if (ka->sa.sa_flags & SA_SIGINFO) { 
+       if (ka->sa.sa_flags & SA_SIGINFO) {
                err |= copy_siginfo_to_user(&frame->info, info);
                if (err)
                        goto give_sigsegv;
        }
-               
+
        /* Create the ucontext.  */
        err |= __put_user(0, &frame->uc.uc_flags);
        err |= __put_user(0, &frame->uc.uc_link);
@@ -311,9 +311,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
        err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
        err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
        err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
-       if (sizeof(*set) == 16) { 
+       if (sizeof(*set) == 16) {
                __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
-               __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); 
+               __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
        } else
                err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
@@ -324,7 +324,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
                err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
        } else {
                /* could use a vstub here */
-               goto give_sigsegv; 
+               goto give_sigsegv;
        }
 
        if (err)
@@ -332,7 +332,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
        /* Set up registers for signal handler */
        regs->di = sig;
-       /* In case the signal handler was declared without prototypes */ 
+       /* In case the signal handler was declared without prototypes */
        regs->ax = 0;
 
        /* This also works for non SA_SIGINFO handlers because they expect the
@@ -355,37 +355,8 @@ give_sigsegv:
 }
 
 /*
- * Return -1L or the syscall number that @regs is executing.
- */
-static long current_syscall(struct pt_regs *regs)
-{
-       /*
-        * We always sign-extend a -1 value being set here,
-        * so this is always either -1L or a syscall number.
-        */
-       return regs->orig_ax;
-}
-
-/*
- * Return a value that is -EFOO if the system call in @regs->orig_ax
- * returned an error.  This only works for @regs from @current.
- */
-static long current_syscall_ret(struct pt_regs *regs)
-{
-#ifdef CONFIG_IA32_EMULATION
-       if (test_thread_flag(TIF_IA32))
-               /*
-                * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
-                * and will match correctly in comparisons.
-                */
-               return (int) regs->ax;
-#endif
-       return regs->ax;
-}
-
-/*
  * OK, we're invoking a handler
- */    
+ */
 
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -394,9 +365,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
        int ret;
 
        /* Are we from a system call? */
-       if (current_syscall(regs) >= 0) {
+       if (syscall_get_nr(current, regs) >= 0) {
                /* If so, check system call restarting.. */
-               switch (current_syscall_ret(regs)) {
+               switch (syscall_get_error(current, regs)) {
                case -ERESTART_RESTARTBLOCK:
                case -ERESTARTNOHAND:
                        regs->ax = -EINTR;
@@ -429,7 +400,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
                        ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
                else
                        ret = ia32_setup_frame(sig, ka, oldset, regs);
-       } else 
+       } else
 #endif
        ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
@@ -453,15 +424,16 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
                 * handler too.
                 */
                regs->flags &= ~X86_EFLAGS_TF;
-               if (test_thread_flag(TIF_SINGLESTEP))
-                       ptrace_notify(SIGTRAP);
 
                spin_lock_irq(&current->sighand->siglock);
-               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+               sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
                if (!(ka->sa.sa_flags & SA_NODEFER))
-                       sigaddset(&current->blocked,sig);
+                       sigaddset(&current->blocked, sig);
                recalc_sigpending();
                spin_unlock_irq(&current->sighand->siglock);
+
+               tracehook_signal_handler(sig, info, ka, regs,
+                                        test_thread_flag(TIF_SINGLESTEP));
        }
 
        return ret;
@@ -518,9 +490,9 @@ static void do_signal(struct pt_regs *regs)
        }
 
        /* Did we come from a system call? */
-       if (current_syscall(regs) >= 0) {
+       if (syscall_get_nr(current, regs) >= 0) {
                /* Restart the system call - no handlers present */
-               switch (current_syscall_ret(regs)) {
+               switch (syscall_get_error(current, regs)) {
                case -ERESTARTNOHAND:
                case -ERESTARTSYS:
                case -ERESTARTNOINTR:
@@ -558,17 +530,23 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
        /* deal with pending signal delivery */
        if (thread_info_flags & _TIF_SIGPENDING)
                do_signal(regs);
+
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+       }
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{ 
-       struct task_struct *me = current; 
+{
+       struct task_struct *me = current;
        if (show_unhandled_signals && printk_ratelimit()) {
                printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
-              me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
+              me->comm, me->pid, where, frame, regs->ip,
+                  regs->sp, regs->orig_ax);
                print_vma_addr(" in ", regs->ip);
                printk("\n");
        }
 
-       force_sig(SIGSEGV, me); 
-} 
+       force_sig(SIGSEGV, me);
+}
index 7985c5b..45531e3 100644 (file)
@@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
 #define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
 #define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
 #else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
 #define get_idle_for_cpu(x)      (idle_thread_array[(x)])
 #define set_idle_for_cpu(x, p)   (idle_thread_array[(x)] = (p))
 #endif
@@ -129,7 +129,7 @@ static int boot_cpu_logical_apicid;
 static cpumask_t cpu_sibling_setup_map;
 
 /* Set if we find a B stepping CPU */
-int __cpuinitdata smp_b_stepping;
+static int __cpuinitdata smp_b_stepping;
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
 
@@ -1313,16 +1313,13 @@ __init void prefill_possible_map(void)
        if (!num_processors)
                num_processors = 1;
 
-#ifdef CONFIG_HOTPLUG_CPU
        if (additional_cpus == -1) {
                if (disabled_cpus > 0)
                        additional_cpus = disabled_cpus;
                else
                        additional_cpus = 0;
        }
-#else
-       additional_cpus = 0;
-#endif
+
        possible = num_processors + additional_cpus;
        if (possible > NR_CPUS)
                possible = NR_CPUS;
index 7066cb8..1884a8d 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/uaccess.h>
 #include <linux/unistd.h>
 
+#include <asm/syscalls.h>
+
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
                          unsigned long prot, unsigned long flags,
                          unsigned long fd, unsigned long pgoff)
index 3b360ef..6bc211a 100644 (file)
 #include <linux/utsname.h>
 #include <linux/personality.h>
 #include <linux/random.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/ia32.h>
+#include <asm/syscalls.h>
 
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
-       unsigned long fd, unsigned long off)
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+               unsigned long prot, unsigned long flags,
+               unsigned long fd, unsigned long off)
 {
        long error;
-       struct file * file;
+       struct file *file;
 
        error = -EINVAL;
        if (off & ~PAGE_MASK)
@@ -56,9 +58,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
                   unmapped base down for this case. This can give
                   conflicts with the heap, but we assume that glibc
                   malloc knows how to fall back to mmap. Give it 1GB
-                  of playground for now. -AK */ 
-               *begin = 0x40000000; 
-               *end = 0x80000000;              
+                  of playground for now. -AK */
+               *begin = 0x40000000;
+               *end = 0x80000000;
                if (current->flags & PF_RANDOMIZE) {
                        new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
                        if (new_begin)
@@ -66,9 +68,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
                }
        } else {
                *begin = TASK_UNMAPPED_BASE;
-               *end = TASK_SIZE; 
+               *end = TASK_SIZE;
        }
-} 
+}
 
 unsigned long
 arch_get_unmapped_area(struct file *filp, unsigned long addr,
@@ -78,11 +80,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
        struct vm_area_struct *vma;
        unsigned long start_addr;
        unsigned long begin, end;
-       
+
        if (flags & MAP_FIXED)
                return addr;
 
-       find_start_end(flags, &begin, &end); 
+       find_start_end(flags, &begin, &end);
 
        if (len > end)
                return -ENOMEM;
@@ -96,12 +98,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
        }
        if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
            && len <= mm->cached_hole_size) {
-               mm->cached_hole_size = 0;
+               mm->cached_hole_size = 0;
                mm->free_area_cache = begin;
        }
        addr = mm->free_area_cache;
-       if (addr < begin) 
-               addr = begin; 
+       if (addr < begin)
+               addr = begin;
        start_addr = addr;
 
 full_search:
@@ -127,7 +129,7 @@ full_search:
                        return addr;
                }
                if (addr + mm->cached_hole_size < vma->vm_start)
-                       mm->cached_hole_size = vma->vm_start - addr;
+                       mm->cached_hole_size = vma->vm_start - addr;
 
                addr = vma->vm_end;
        }
@@ -177,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                vma = find_vma(mm, addr-len);
                if (!vma || addr <= vma->vm_start)
                        /* remember the address as a hint for next time */
-                       return (mm->free_area_cache = addr-len);
+                       return mm->free_area_cache = addr-len;
        }
 
        if (mm->mmap_base < len)
@@ -194,7 +196,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                vma = find_vma(mm, addr);
                if (!vma || addr+len <= vma->vm_start)
                        /* remember the address as a hint for next time */
-                       return (mm->free_area_cache = addr);
+                       return mm->free_area_cache = addr;
 
                /* remember the largest hole we saw so far */
                if (addr + mm->cached_hole_size < vma->vm_start)
@@ -224,13 +226,13 @@ bottomup:
 }
 
 
-asmlinkage long sys_uname(struct new_utsname __user * name)
+asmlinkage long sys_uname(struct new_utsname __user *name)
 {
        int err;
        down_read(&uts_sem);
-       err = copy_to_user(name, utsname(), sizeof (*name));
+       err = copy_to_user(name, utsname(), sizeof(*name));
        up_read(&uts_sem);
-       if (personality(current->personality) == PER_LINUX32) 
-               err |= copy_to_user(&name->machine, "i686", 5);                 
+       if (personality(current->personality) == PER_LINUX32)
+               err |= copy_to_user(&name->machine, "i686", 5);
        return err ? -EFAULT : 0;
 }
index 170d43c..3d1be4f 100644 (file)
@@ -8,12 +8,12 @@
 #define __NO_STUBS
 
 #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #include <asm/unistd_64.h>
 
 #undef __SYSCALL
 #define __SYSCALL(nr, sym) [nr] = sym,
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 
 typedef void (*sys_call_ptr_t)(void);
 
index ffe3c66..bbecf8b 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/time.h>
+#include <asm/timer.h>
 
 #include "do_timer.h"
 
index ab6bf37..6bb7b85 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/ldt.h>
 #include <asm/processor.h>
 #include <asm/proto.h>
+#include <asm/syscalls.h>
 
 #include "tls.h"
 
index 513caac..7a31f10 100644 (file)
@@ -32,6 +32,8 @@
 #include <linux/bug.h>
 #include <linux/nmi.h>
 #include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/io.h>
 
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
@@ -45,9 +47,6 @@
 #include <asm/unwind.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/io.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
 #include <asm/pda.h>
@@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 
 void printk_address(unsigned long address, int reliable)
 {
-       printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
+       printk(" [<%016lx>] %s%pS\n",
+                       address, reliable ?     "" : "? ", (void *) address);
 }
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
                [STACKFAULT_STACK - 1] = "#SS",
                [MCE_STACK - 1] = "#MC",
 #if DEBUG_STKSZ > EXCEPTION_STKSZ
-               [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+               [N_EXCEPTION_STACKS ...
+                       N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
 #endif
        };
        unsigned k;
@@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 }
 
 /*
- * x86-64 can have up to three kernel stacks: 
+ * x86-64 can have up to three kernel stacks:
  * process stack
  * interrupt stack
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
@@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
                const struct stacktrace_ops *ops, void *data)
 {
        const unsigned cpu = get_cpu();
-       unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
+       unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
        unsigned used = 0;
        struct thread_info *tinfo;
 
@@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
        if (!bp) {
                if (task == current) {
                        /* Grab bp right from our regs */
-                       asm("movq %%rbp, %0" : "=r" (bp) :);
+                       asm("movq %%rbp, %0" : "=r" (bp) : );
                } else {
                        /* bp is the last reg pushed by switch_to */
                        bp = *(unsigned long *) task->thread.sp;
@@ -339,9 +340,8 @@ static void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                unsigned long *stack, unsigned long bp, char *log_lvl)
 {
-       printk("\nCall Trace:\n");
+       printk("Call Trace:\n");
        dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-       printk("\n");
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
@@ -357,11 +357,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
        unsigned long *stack;
        int i;
        const int cpu = smp_processor_id();
-       unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
-       unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+       unsigned long *irqstack_end =
+               (unsigned long *) (cpu_pda(cpu)->irqstackptr);
+       unsigned long *irqstack =
+               (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
 
-       // debugging aid: "show_stack(NULL, NULL);" prints the
-       // back trace for this cpu.
+       /*
+        * debugging aid: "show_stack(NULL, NULL);" prints the
+        * back trace for this cpu.
+        */
 
        if (sp == NULL) {
                if (task)
@@ -386,6 +390,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
                printk(" %016lx", *stack++);
                touch_nmi_watchdog();
        }
+       printk("\n");
        show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
@@ -404,7 +409,7 @@ void dump_stack(void)
 
 #ifdef CONFIG_FRAME_POINTER
        if (!bp)
-               asm("movq %%rbp, %0" : "=r" (bp):);
+               asm("movq %%rbp, %0" : "=r" (bp) : );
 #endif
 
        printk("Pid: %d, comm: %.20s %s %s %.*s\n",
@@ -414,7 +419,6 @@ void dump_stack(void)
                init_utsname()->version);
        show_trace(NULL, NULL, &stack, bp);
 }
-
 EXPORT_SYMBOL(dump_stack);
 
 void show_registers(struct pt_regs *regs)
@@ -443,7 +447,6 @@ void show_registers(struct pt_regs *regs)
                printk("Stack: ");
                show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
                                regs->bp, "");
-               printk("\n");
 
                printk(KERN_EMERG "Code: ");
 
@@ -493,7 +496,7 @@ unsigned __kprobes long oops_begin(void)
        raw_local_irq_save(flags);
        cpu = smp_processor_id();
        if (!__raw_spin_trylock(&die_lock)) {
-               if (cpu == die_owner) 
+               if (cpu == die_owner)
                        /* nested oops. should stop eventually */;
                else
                        __raw_spin_lock(&die_lock);
@@ -638,7 +641,7 @@ kernel_trap:
 }
 
 #define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code)      \
+asmlinkage void do_##name(struct pt_regs *regs, long error_code)       \
 {                                                                      \
        if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
                                                        == NOTIFY_STOP) \
@@ -648,7 +651,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code)   \
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)                \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code)      \
+asmlinkage void do_##name(struct pt_regs *regs, long error_code)       \
 {                                                                      \
        siginfo_t info;                                                 \
        info.si_signo = signr;                                          \
@@ -683,7 +686,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
        preempt_conditional_cli(regs);
 }
 
-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
+asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
 {
        static const char str[] = "double fault";
        struct task_struct *tsk = current;
@@ -778,9 +781,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
 }
 
 static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
-       if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+       if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
+                       NOTIFY_STOP)
                return;
        printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
                reason);
@@ -882,7 +886,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
        else if (user_mode(eregs))
                regs = task_pt_regs(current);
        /* Exception from kernel and interrupts are enabled. Move to
-          kernel process stack. */
+          kernel process stack. */
        else if (eregs->flags & X86_EFLAGS_IF)
                regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
        if (eregs != regs)
@@ -891,7 +895,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 }
 
 /* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs * regs,
+asmlinkage void __kprobes do_debug(struct pt_regs *regs,
                                   unsigned long error_code)
 {
        struct task_struct *tsk = current;
@@ -1035,7 +1039,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
 
 asmlinkage void bad_intr(void)
 {
-       printk("bad interrupt"); 
+       printk("bad interrupt");
 }
 
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
@@ -1047,7 +1051,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
 
        conditional_sti(regs);
        if (!user_mode(regs) &&
-               kernel_math_error(regs, "kernel simd math error", 19))
+                       kernel_math_error(regs, "kernel simd math error", 19))
                return;
 
        /*
@@ -1092,7 +1096,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
        force_sig_info(SIGFPE, &info, task);
 }
 
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
 {
 }
 
@@ -1149,8 +1153,10 @@ void __init trap_init(void)
        set_intr_gate(0, &divide_error);
        set_intr_gate_ist(1, &debug, DEBUG_STACK);
        set_intr_gate_ist(2, &nmi, NMI_STACK);
-       set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
-       set_system_gate(4, &overflow); /* int4 can be called from all */
+       /* int3 can be called from all */
+       set_system_gate_ist(3, &int3, DEBUG_STACK);
+       /* int4 can be called from all */
+       set_system_gate(4, &overflow);
        set_intr_gate(5, &bounds);
        set_intr_gate(6, &invalid_op);
        set_intr_gate(7, &device_not_available);
index 8f98e9d..161bb85 100644 (file)
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup);
 /*
  * Read TSC and the reference counters. Take care of SMI disturbance
  */
-static u64 tsc_read_refs(u64 *pm, u64 *hpet)
+static u64 tsc_read_refs(u64 *p, int hpet)
 {
        u64 t1, t2;
        int i;
@@ -112,9 +112,9 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet)
        for (i = 0; i < MAX_RETRIES; i++) {
                t1 = get_cycles();
                if (hpet)
-                       *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+                       *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
                else
-                       *pm = acpi_pm_read_early();
+                    &nbs