Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris...
Linus Torvalds [Thu, 11 Jun 2009 17:01:41 +0000 (10:01 -0700)]
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6: (44 commits)
  nommu: Provide mmap_min_addr definition.
  TOMOYO: Add description of lists and structures.
  TOMOYO: Remove unused field.
  integrity: ima audit dentry_open failure
  TOMOYO: Remove unused parameter.
  security: use mmap_min_addr indepedently of security models
  TOMOYO: Simplify policy reader.
  TOMOYO: Remove redundant markers.
  SELinux: define audit permissions for audit tree netlink messages
  TOMOYO: Remove unused mutex.
  tomoyo: avoid get+put of task_struct
  smack: Remove redundant initialization.
  integrity: nfsd imbalance bug fix
  rootplug: Remove redundant initialization.
  smack: do not beyond ARRAY_SIZE of data
  integrity: move ima_counts_get
  integrity: path_check update
  IMA: Add __init notation to ima functions
  IMA: Minimal IMA policy and boot param for TCB IMA policy
  selinux: remove obsolete read buffer limit from sel_read_bool
  ...

301 files changed:
Documentation/DocBook/Makefile
Documentation/DocBook/tracepoint.tmpl [new file with mode: 0644]
Documentation/RCU/trace.txt
Documentation/kernel-parameters.txt
Documentation/trace/events.txt [new file with mode: 0644]
Documentation/trace/ftrace.txt
Documentation/trace/power.txt [new file with mode: 0644]
MAINTAINERS
arch/arm/plat-mxc/include/mach/imx-uart.h
arch/frv/Kconfig
arch/frv/include/asm/bitops.h
arch/frv/include/asm/elf.h
arch/frv/include/asm/pci.h
arch/frv/include/asm/ptrace.h
arch/frv/include/asm/syscall.h [new file with mode: 0644]
arch/frv/include/asm/thread_info.h
arch/frv/kernel/entry.S
arch/frv/kernel/ptrace.c
arch/frv/kernel/signal.c
arch/frv/kernel/uaccess.c
arch/frv/mb93090-mb00/pci-dma-nommu.c
arch/frv/mb93090-mb00/pci-dma.c
arch/mn10300/Kconfig
arch/mn10300/include/asm/elf.h
arch/mn10300/include/asm/processor.h
arch/mn10300/include/asm/ptrace.h
arch/mn10300/kernel/entry.S
arch/mn10300/kernel/ptrace.c
arch/mn10300/kernel/signal.c
arch/mn10300/mm/tlb-mn10300.S
arch/x86/Kconfig.debug
arch/x86/ia32/ia32entry.S
arch/x86/include/asm/ds.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/termios.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/unistd_32.h
arch/x86/include/asm/unistd_64.h
arch/x86/kernel/Makefile
arch/x86/kernel/ds.c
arch/x86/kernel/ds_selftest.c [new file with mode: 0644]
arch/x86/kernel/ds_selftest.h [new file with mode: 0644]
arch/x86/kernel/entry_64.S
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/stacktrace.c
arch/x86/kernel/syscall_table_32.S
arch/x86/mm/kmmio.c
arch/x86/mm/mmio-mod.c
arch/x86/oprofile/nmi_int.c
block/blk-core.c
block/blk-sysfs.c
block/compat_ioctl.c
block/elevator.c
drivers/bluetooth/hci_ldisc.c
drivers/char/Kconfig
drivers/char/Makefile
drivers/char/bfin_jtag_comm.c [new file with mode: 0644]
drivers/char/cyclades.c
drivers/char/epca.c
drivers/char/ip2/i2lib.c
drivers/char/ip2/ip2main.c
drivers/char/isicom.c
drivers/char/istallion.c
drivers/char/moxa.c
drivers/char/mxser.c
drivers/char/n_hdlc.c
drivers/char/n_tty.c
drivers/char/pcmcia/synclink_cs.c
drivers/char/pty.c
drivers/char/rocket.c
drivers/char/selection.c
drivers/char/stallion.c
drivers/char/synclink.c
drivers/char/synclink_gt.c
drivers/char/synclinkmp.c
drivers/char/tty_audit.c
drivers/char/tty_io.c
drivers/char/tty_ioctl.c
drivers/char/tty_ldisc.c
drivers/char/tty_port.c
drivers/ide/alim15x3.c
drivers/ide/ide-atapi.c
drivers/ide/ide-cd.c
drivers/ide/ide-cd.h
drivers/ide/ide-disk.c
drivers/ide/ide-dma.c
drivers/ide/ide-floppy.c
drivers/ide/ide-io.c
drivers/ide/ide-ioctls.c
drivers/ide/ide-park.c
drivers/ide/ide-pm.c
drivers/ide/ide-tape.c
drivers/ide/ide-taskfile.c
drivers/md/dm.c
drivers/parport/parport_pc.c
drivers/scsi/sg.c
drivers/serial/8250.c
drivers/serial/8250_pci.c
drivers/serial/Kconfig
drivers/serial/Makefile
drivers/serial/bfin_5xx.c
drivers/serial/bfin_sport_uart.c
drivers/serial/icom.c
drivers/serial/imx.c
drivers/serial/jsm/jsm.h
drivers/serial/jsm/jsm_tty.c
drivers/serial/timbuart.c [new file with mode: 0644]
drivers/serial/timbuart.h [new file with mode: 0644]
drivers/usb/class/cdc-acm.c
drivers/usb/class/cdc-acm.h
drivers/usb/serial/belkin_sa.c
drivers/usb/serial/ch341.c
drivers/usb/serial/console.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/cyberjack.c
drivers/usb/serial/cypress_m8.c
drivers/usb/serial/digi_acceleport.c
drivers/usb/serial/empeg.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/garmin_gps.c
drivers/usb/serial/generic.c
drivers/usb/serial/io_edgeport.c
drivers/usb/serial/io_ti.c
drivers/usb/serial/ipaq.c
drivers/usb/serial/ipw.c
drivers/usb/serial/ir-usb.c
drivers/usb/serial/iuu_phoenix.c
drivers/usb/serial/keyspan.c
drivers/usb/serial/keyspan.h
drivers/usb/serial/keyspan_pda.c
drivers/usb/serial/kl5kusb105.c
drivers/usb/serial/kobil_sct.c
drivers/usb/serial/mct_u232.c
drivers/usb/serial/mos7720.c
drivers/usb/serial/mos7840.c
drivers/usb/serial/navman.c
drivers/usb/serial/omninet.c
drivers/usb/serial/opticon.c
drivers/usb/serial/option.c
drivers/usb/serial/oti6858.c
drivers/usb/serial/pl2303.c
drivers/usb/serial/sierra.c
drivers/usb/serial/spcp8x5.c
drivers/usb/serial/symbolserial.c
drivers/usb/serial/ti_usb_3410_5052.c
drivers/usb/serial/usb-serial.c
drivers/usb/serial/visor.c
drivers/usb/serial/whiteheat.c
fs/bio.c
fs/buffer.c
fs/devpts/inode.c
fs/ext2/super.c
fs/ext3/super.c
fs/ext4/Makefile
fs/ext4/balloc.c
fs/ext4/block_validity.c [new file with mode: 0644]
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/ext4_i.h [deleted file]
fs/ext4/ext4_sb.h [deleted file]
fs/ext4/extents.c
fs/ext4/group.h [deleted file]
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/mballoc.h
fs/ext4/namei.c
fs/ext4/namei.h [deleted file]
fs/ext4/resize.c
fs/ext4/super.c
fs/ioctl.c
fs/jbd2/journal.c
fs/mpage.c
include/asm-generic/vmlinux.lds.h
include/linux/blktrace_api.h
include/linux/compat.h
include/linux/cyclades.h
include/linux/ftrace.h
include/linux/ftrace_event.h [new file with mode: 0644]
include/linux/ide.h
include/linux/init_task.h
include/linux/kmemtrace.h [new file with mode: 0644]
include/linux/mm.h
include/linux/mmiotrace.h
include/linux/module.h
include/linux/pci_ids.h
include/linux/ptrace.h
include/linux/rational.h [new file with mode: 0644]
include/linux/rculist.h
include/linux/rcutree.h
include/linux/ring_buffer.h
include/linux/sched.h
include/linux/serial.h
include/linux/serial_core.h
include/linux/signal.h
include/linux/slab_def.h
include/linux/slub_def.h
include/linux/trace_seq.h [new file with mode: 0644]
include/linux/tracepoint.h
include/linux/tty.h
include/linux/tty_driver.h
include/linux/usb/serial.h
include/trace/block.h [deleted file]
include/trace/define_trace.h [new file with mode: 0644]
include/trace/events/block.h [new file with mode: 0644]
include/trace/events/irq.h [new file with mode: 0644]
include/trace/events/kmem.h [new file with mode: 0644]
include/trace/events/lockdep.h [new file with mode: 0644]
include/trace/events/sched.h [moved from include/trace/sched_event_types.h with 91% similarity]
include/trace/events/skb.h [new file with mode: 0644]
include/trace/events/workqueue.h [new file with mode: 0644]
include/trace/ftrace.h [new file with mode: 0644]
include/trace/irq.h [deleted file]
include/trace/irq_event_types.h [deleted file]
include/trace/kmemtrace.h [deleted file]
include/trace/lockdep.h [deleted file]
include/trace/lockdep_event_types.h [deleted file]
include/trace/sched.h [deleted file]
include/trace/skb.h [deleted file]
include/trace/trace_event_types.h [deleted file]
include/trace/trace_events.h [deleted file]
include/trace/workqueue.h [deleted file]
init/Kconfig
init/main.c
ipc/sem.c
kernel/Makefile
kernel/compat.c
kernel/exit.c
kernel/fork.c
kernel/irq/handle.c
kernel/kthread.c
kernel/lockdep.c
kernel/module.c
kernel/ptrace.c
kernel/rcupreempt.c
kernel/rcutree.c
kernel/rcutree_trace.c
kernel/sched.c
kernel/signal.c
kernel/softirq.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/blktrace.c
kernel/trace/events.c [deleted file]
kernel/trace/ftrace.c
kernel/trace/kmemtrace.c
kernel/trace/ring_buffer.c
kernel/trace/ring_buffer_benchmark.c [new file with mode: 0644]
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_boot.c
kernel/trace/trace_branch.c
kernel/trace/trace_event_profile.c
kernel/trace/trace_event_types.h
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_events_stage_1.h [deleted file]
kernel/trace/trace_events_stage_2.h [deleted file]
kernel/trace/trace_events_stage_3.h [deleted file]
kernel/trace/trace_export.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_hw_branches.c
kernel/trace/trace_mmiotrace.c
kernel/trace/trace_output.c
kernel/trace/trace_output.h
kernel/trace/trace_power.c
kernel/trace/trace_printk.c
kernel/trace/trace_sched_switch.c
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_selftest.c
kernel/trace/trace_stack.c
kernel/trace/trace_stat.c
kernel/trace/trace_stat.h
kernel/trace/trace_sysprof.c
kernel/trace/trace_workqueue.c
kernel/workqueue.c
lib/Kconfig
lib/Makefile
lib/rational.c [new file with mode: 0644]
mm/bounce.c
mm/mlock.c
mm/slab.c
mm/slob.c
mm/slub.c
mm/util.c
net/core/drop_monitor.c
net/core/net-traces.c
net/core/skbuff.c
samples/Kconfig
samples/Makefile
samples/trace_events/Makefile [new file with mode: 0644]
samples/trace_events/trace-events-sample.c [new file with mode: 0644]
samples/trace_events/trace-events-sample.h [new file with mode: 0644]
scripts/kernel-doc
scripts/recordmcount.pl
security/integrity/ima/ima_fs.c
security/smack/smackfs.c

index b1eb661..9632444 100644 (file)
@@ -13,7 +13,8 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \
            gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
            genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
            mac80211.xml debugobjects.xml sh.xml regulator.xml \
-           alsa-driver-api.xml writing-an-alsa-driver.xml
+           alsa-driver-api.xml writing-an-alsa-driver.xml \
+           tracepoint.xml
 
 ###
 # The build process is as follows (targets):
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644 (file)
index 0000000..b0756d0
--- /dev/null
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+       "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Tracepoints">
+ <bookinfo>
+  <title>The Linux Kernel Tracepoint API</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Jason</firstname>
+    <surname>Baron</surname>
+    <affiliation>
+     <address>
+      <email>jbaron@redhat.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+  <chapter id="intro">
+   <title>Introduction</title>
+   <para>
+     Tracepoints are static probe points that are located in strategic points
+     throughout the kernel. 'Probes' register/unregister with tracepoints
+     via a callback mechanism. The 'probes' are strictly typed functions that
+     are passed a unique set of parameters defined by each tracepoint.
+   </para>
+
+   <para>
+     From this simple callback mechanism, 'probes' can be used to profile, debug,
+     and understand kernel behavior. There are a number of tools that provide a
+     framework for using 'probes'. These tools include Systemtap, ftrace, and
+     LTTng.
+   </para>
+
+   <para>
+     Tracepoints are defined in a number of header files via various macros. Thus,
+     the purpose of this document is to provide a clear accounting of the available
+     tracepoints. The intention is to understand not only what tracepoints are
+     available but also to understand where future tracepoints might be added.
+   </para>
+
+   <para>
+     The API presented has functions of the form:
+     <function>trace_tracepointname(function parameters)</function>. These are the
+     tracepoints callbacks that are found throughout the code. Registering and
+     unregistering probes with these callback sites is covered in the
+     <filename>Documentation/trace/*</filename> directory.
+   </para>
+  </chapter>
+
+  <chapter id="irq">
+   <title>IRQ</title>
+!Iinclude/trace/events/irq.h
+  </chapter>
+
+</book>
index 0688482..02cced1 100644 (file)
@@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy).
 The output of "cat rcu/rcudata" looks as follows:
 
 rcu:
-  0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10
-  1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10
-  2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10
-  3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10
-  4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10
-  5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10
-  6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10
-  7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10
+rcu:
+  0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
+  1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
+  2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
+  3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
+  4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
+  5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
+  6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
+  7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
 rcu_bh:
-  0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10
-  1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10
-  2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10
-  3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10
-  4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
-  6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
+  0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
+  1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
+  2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
+  4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
 
 The first section lists the rcu_data structures for rcu, the second for
 rcu_bh.  Each section has one line per CPU, or eight for this 8-CPU system.
@@ -253,12 +254,6 @@ o  "pqc" indicates which grace period the last-observed quiescent
 o      "qp" indicates that RCU still expects a quiescent state from
        this CPU.
 
-o      "rpfq" is the number of rcu_pending() calls on this CPU required
-       to induce this CPU to invoke force_quiescent_state().
-
-o      "rp" is low-order four hex digits of the count of how many times
-       rcu_pending() has been invoked on this CPU.
-
 o      "dt" is the current value of the dyntick counter that is incremented
        when entering or leaving dynticks idle state, either by the
        scheduler or by irq.  The number after the "/" is the interrupt
@@ -305,6 +300,9 @@ o   "b" is the batch limit for this CPU.  If more than this number
        of RCU callbacks is ready to invoke, then the remainder will
        be deferred.
 
+There is also an rcu/rcudata.csv file with the same information in
+comma-separated-variable spreadsheet format.
+
 
 The output of "cat rcu/rcugp" looks as follows:
 
@@ -411,3 +409,63 @@ o  Each element of the form "1/1 0:127 ^0" represents one struct
                For example, the first entry at the lowest level shows
                "^0", indicating that it corresponds to bit zero in
                the first entry at the middle level.
+
+
+The output of "cat rcu/rcu_pending" looks as follows:
+
+rcu:
+  0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
+  1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
+  2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
+  3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723
+  4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110
+  5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456
+  6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834
+  7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888
+rcu_bh:
+  0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314
+  1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180
+  2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936
+  3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863
+  4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671
+  5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235
+  6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921
+  7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542
+
+As always, this is once again split into "rcu" and "rcu_bh" portions.
+The fields are as follows:
+
+o      "np" is the number of times that __rcu_pending() has been invoked
+       for the corresponding flavor of RCU.
+
+o      "qsp" is the number of times that the RCU was waiting for a
+       quiescent state from this CPU.
+
+o      "cbr" is the number of times that this CPU had RCU callbacks
+       that had passed through a grace period, and were thus ready
+       to be invoked.
+
+o      "cng" is the number of times that this CPU needed another
+       grace period while RCU was idle.
+
+o      "gpc" is the number of times that an old grace period had
+       completed, but this CPU was not yet aware of it.
+
+o      "gps" is the number of times that a new grace period had started,
+       but this CPU was not yet aware of it.
+
+o      "nf" is the number of times that this CPU suspected that the
+       current grace period had run for too long, and thus needed to
+       be forced.
+
+       Please note that "forcing" consists of sending resched IPIs
+       to holdout CPUs.  If that CPU really still is in an old RCU
+       read-side critical section, then we really do have to wait for it.
+       The assumption behing "forcing" is that the CPU is not still in
+       an old RCU read-side critical section, but has not yet responded
+       for some other reason.
+
+o      "nn" is the number of times that this CPU needed nothing.  Alert
+       readers will note that the rcu "nn" number for a given CPU very
+       closely matches the rcu_bh "np" number for that same CPU.  This
+       is due to short-circuit evaluation in rcu_pending().
index a5253f6..72d3bf0 100644 (file)
@@ -56,7 +56,6 @@ parameter is applicable:
        ISAPNP  ISA PnP code is enabled.
        ISDN    Appropriate ISDN support is enabled.
        JOY     Appropriate joystick support is enabled.
-       KMEMTRACE kmemtrace is enabled.
        LIBATA  Libata driver is enabled
        LP      Printer support is enabled.
        LOOP    Loopback device support is enabled.
@@ -754,12 +753,25 @@ and is between 256 and 4096 characters. It is defined in the file
                        ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
 
        ftrace=[tracer]
-                       [ftrace] will set and start the specified tracer
+                       [FTRACE] will set and start the specified tracer
                        as early as possible in order to facilitate early
                        boot debugging.
 
        ftrace_dump_on_oops
-                       [ftrace] will dump the trace buffers on oops.
+                       [FTRACE] will dump the trace buffers on oops.
+
+       ftrace_filter=[function-list]
+                       [FTRACE] Limit the functions traced by the function
+                       tracer at boot up. function-list is a comma separated
+                       list of functions. This list can be changed at run
+                       time by the set_ftrace_filter file in the debugfs
+                       tracing directory. 
+
+       ftrace_notrace=[function-list]
+                       [FTRACE] Do not trace the functions specified in
+                       function-list. This list can be changed at run time
+                       by the set_ftrace_notrace file in the debugfs
+                       tracing directory.
 
        gamecon.map[2|3]=
                        [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
@@ -1062,15 +1074,6 @@ and is between 256 and 4096 characters. It is defined in the file
                        use the HighMem zone if it exists, and the Normal
                        zone if it does not.
 
-       kmemtrace.enable=       [KNL,KMEMTRACE] Format: { yes | no }
-                               Controls whether kmemtrace is enabled
-                               at boot-time.
-
-       kmemtrace.subbufs=n     [KNL,KMEMTRACE] Overrides the number of
-                       subbufs kmemtrace's relay channel has. Set this
-                       higher than default (KMEMTRACE_N_SUBBUFS in code) if
-                       you experience buffer overruns.
-
        kgdboc=         [HW] kgdb over consoles.
                        Requires a tty driver that supports console polling.
                        (only serial suported for now)
@@ -1671,6 +1674,14 @@ and is between 256 and 4096 characters. It is defined in the file
        oprofile.timer= [HW]
                        Use timer interrupt instead of performance counters
 
+       oprofile.cpu_type=      Force an oprofile cpu type
+                       This might be useful if you have an older oprofile
+                       userland or if you want common events.
+                       Format: { archperfmon }
+                       archperfmon: [X86] Force use of architectural
+                               perfmon on Intel CPUs instead of the
+                               CPU specific event set.
+
        osst=           [HW,SCSI] SCSI Tape Driver
                        Format: <buffer_size>,<write_threshold>
                        See also Documentation/scsi/st.txt.
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644 (file)
index 0000000..f157d75
--- /dev/null
@@ -0,0 +1,90 @@
+                            Event Tracing
+
+               Documentation written by Theodore Ts'o
+                       Updated by Li Zefan
+
+1. Introduction
+===============
+
+Tracepoints (see Documentation/trace/tracepoints.txt) can be used
+without creating custom kernel modules to register probe functions
+using the event tracing infrastructure.
+
+Not all tracepoints can be traced using the event tracing system;
+the kernel developer must provide code snippets which define how the
+tracing information is saved into the tracing buffer, and how the
+tracing information should be printed.
+
+2. Using Event Tracing
+======================
+
+2.1 Via the 'set_event' interface
+---------------------------------
+
+The events which are available for tracing can be found in the file
+/debug/tracing/available_events.
+
+To enable a particular event, such as 'sched_wakeup', simply echo it
+to /debug/tracing/set_event. For example:
+
+       # echo sched_wakeup >> /debug/tracing/set_event
+
+[ Note: '>>' is necessary, otherwise it will firstly disable
+  all the events. ]
+
+To disable an event, echo the event name to the set_event file prefixed
+with an exclamation point:
+
+       # echo '!sched_wakeup' >> /debug/tracing/set_event
+
+To disable all events, echo an empty line to the set_event file:
+
+       # echo > /debug/tracing/set_event
+
+To enable all events, echo '*:*' or '*:' to the set_event file:
+
+       # echo *:* > /debug/tracing/set_event
+
+The events are organized into subsystems, such as ext4, irq, sched,
+etc., and a full event name looks like this: <subsystem>:<event>.  The
+subsystem name is optional, but it is displayed in the available_events
+file.  All of the events in a subsystem can be specified via the syntax
+"<subsystem>:*"; for example, to enable all irq events, you can use the
+command:
+
+       # echo 'irq:*' > /debug/tracing/set_event
+
+2.2 Via the 'enable' toggle
+---------------------------
+
+The events available are also listed in /debug/tracing/events/ hierarchy
+of directories.
+
+To enable event 'sched_wakeup':
+
+       # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To disable it:
+
+       # echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To enable all events in sched subsystem:
+
+       # echo 1 > /debug/tracing/events/sched/enable
+
+To eanble all events:
+
+       # echo 1 > /debug/tracing/events/enable
+
+When reading one of these enable files, there are four results:
+
+ 0 - all events this file affects are disabled
+ 1 - all events this file affects are enabled
+ X - there is a mixture of events enabled and disabled
+ ? - this file does not affect any event
+
+3. Defining an event-enabled tracepoint
+=======================================
+
+See The example provided in samples/trace_events
+
index e362f50..2a82d86 100644 (file)
@@ -179,7 +179,7 @@ Here is the list of current tracers that may be configured.
 
        Function call tracer to trace all kernel functions.
 
-  "function_graph_tracer"
+  "function_graph"
 
        Similar to the function tracer except that the
        function tracer probes the functions on their entry
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
new file mode 100644 (file)
index 0000000..cd805e1
--- /dev/null
@@ -0,0 +1,17 @@
+The power tracer collects detailed information about C-state and P-state
+transitions, instead of just looking at the high-level "average"
+information.
+
+There is a helper script found in scrips/tracing/power.pl in the kernel
+sources which can be used to parse this information and create a
+Scalable Vector Graphics (SVG) picture from the trace data.
+
+To use this tracer:
+
+       echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+       echo power > /sys/kernel/debug/tracing/current_tracer
+       echo 1 > /sys/kernel/debug/tracing/tracing_enabled
+       sleep 1
+       echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+       cat /sys/kernel/debug/tracing/trace | \
+               perl scripts/tracing/power.pl > out.sv
index cf4abdd..84285b5 100644 (file)
@@ -71,7 +71,7 @@ P: Person
 M: Mail patches to
 L: Mailing list that is relevant to this area
 W: Web-page with status/info
-T: SCM tree type and location.  Type is one of: git, hg, quilt.
+T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit.
 S: Status, one of the following:
 
        Supported:      Someone is actually paid to look after this.
@@ -159,7 +159,8 @@ F:  drivers/net/r8169.c
 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
 L:     linux-serial@vger.kernel.org
 W:     http://serial.sourceforge.net
-S:     Orphan
+M:     alan@lxorguk.ukuu.org.uk
+S:     Odd Fixes
 F:     drivers/serial/8250*
 F:     include/linux/serial_8250.h
 
@@ -5629,6 +5630,7 @@ P:        Alan Cox
 M:     alan@lxorguk.ukuu.org.uk
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
+T:     stgit http://zeniv.linux.org.uk/~alan/ttydev/
 
 TULIP NETWORK DRIVERS
 P:     Grant Grundler
index 599217b..f9bd17d 100644 (file)
 #define ASMARM_ARCH_UART_H
 
 #define IMXUART_HAVE_RTSCTS (1<<0)
+#define IMXUART_IRDA        (1<<1)
 
 struct imxuart_platform_data {
        int (*init)(struct platform_device *pdev);
        int (*exit)(struct platform_device *pdev);
        unsigned int flags;
+       void (*irda_enable)(int enable);
+       unsigned int irda_inv_rx:1;
+       unsigned int irda_inv_tx:1;
+       unsigned short transceiver_delay;
 };
 
 #endif
index 9d1552a..8a5bd7a 100644 (file)
@@ -6,6 +6,7 @@ config FRV
        bool
        default y
        select HAVE_IDE
+       select HAVE_ARCH_TRACEHOOK
 
 config ZONE_DMA
        bool
index 287f6f6..50ae91b 100644 (file)
@@ -112,7 +112,7 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig
 #define atomic_clear_mask(mask, v)     atomic_test_and_ANDNOT_mask((mask), (v))
 #define atomic_set_mask(mask, v)       atomic_test_and_OR_mask((mask), (v))
 
-static inline int test_and_clear_bit(int nr, volatile void *addr)
+static inline int test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *ptr = addr;
        unsigned long mask = 1UL << (nr & 31);
@@ -120,7 +120,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
        return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_set_bit(int nr, volatile void *addr)
+static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *ptr = addr;
        unsigned long mask = 1UL << (nr & 31);
@@ -128,7 +128,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
        return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_change_bit(int nr, volatile void *addr)
+static inline int test_and_change_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *ptr = addr;
        unsigned long mask = 1UL << (nr & 31);
@@ -136,22 +136,22 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
        return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline void clear_bit(int nr, volatile void *addr)
+static inline void clear_bit(unsigned long nr, volatile void *addr)
 {
        test_and_clear_bit(nr, addr);
 }
 
-static inline void set_bit(int nr, volatile void *addr)
+static inline void set_bit(unsigned long nr, volatile void *addr)
 {
        test_and_set_bit(nr, addr);
 }
 
-static inline void change_bit(int nr, volatile void * addr)
+static inline void change_bit(unsigned long nr, volatile void *addr)
 {
        test_and_change_bit(nr, addr);
 }
 
-static inline void __clear_bit(int nr, volatile void * addr)
+static inline void __clear_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask;
@@ -161,7 +161,7 @@ static inline void __clear_bit(int nr, volatile void * addr)
        *a &= ~mask;
 }
 
-static inline void __set_bit(int nr, volatile void * addr)
+static inline void __set_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask;
@@ -171,7 +171,7 @@ static inline void __set_bit(int nr, volatile void * addr)
        *a |= mask;
 }
 
-static inline void __change_bit(int nr, volatile void *addr)
+static inline void __change_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask;
@@ -181,7 +181,7 @@ static inline void __change_bit(int nr, volatile void *addr)
        *a ^= mask;
 }
 
-static inline int __test_and_clear_bit(int nr, volatile void * addr)
+static inline int __test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask, retval;
@@ -193,7 +193,7 @@ static inline int __test_and_clear_bit(int nr, volatile void * addr)
        return retval;
 }
 
-static inline int __test_and_set_bit(int nr, volatile void * addr)
+static inline int __test_and_set_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask, retval;
@@ -205,7 +205,7 @@ static inline int __test_and_set_bit(int nr, volatile void * addr)
        return retval;
 }
 
-static inline int __test_and_change_bit(int nr, volatile void * addr)
+static inline int __test_and_change_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask, retval;
@@ -220,12 +220,13 @@ static inline int __test_and_change_bit(int nr, volatile void * addr)
 /*
  * This routine doesn't need to be atomic.
  */
-static inline int __constant_test_bit(int nr, const volatile void * addr)
+static inline int
+__constant_test_bit(unsigned long nr, const volatile void *addr)
 {
        return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
 }
 
-static inline int __test_bit(int nr, const volatile void * addr)
+static inline int __test_bit(unsigned long nr, const volatile void *addr)
 {
        int     * a = (int *) addr;
        int     mask;
index 7279ec0..7bbf6e4 100644 (file)
@@ -116,6 +116,7 @@ do {                                                                                        \
 } while(0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_FDPIC_CORE_EFLAGS  EF_FRV_FDPIC
 #define ELF_EXEC_PAGESIZE      16384
 
index 585d9b4..cc685e6 100644 (file)
@@ -87,8 +87,7 @@ static inline void pci_dma_sync_single(struct pci_dev *hwdev,
                                       dma_addr_t dma_handle,
                                       size_t size, int direction)
 {
-       if (direction == PCI_DMA_NONE)
-                BUG();
+       BUG_ON(direction == PCI_DMA_NONE);
 
        frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle),
                            (unsigned long)bus_to_virt(dma_handle) + size);
@@ -105,9 +104,7 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
                                   int nelems, int direction)
 {
        int i;
-
-       if (direction == PCI_DMA_NONE)
-                BUG();
+       BUG_ON(direction == PCI_DMA_NONE);
 
        for (i = 0; i < nelems; i++)
                frv_cache_wback_inv(sg_dma_address(&sg[i]),
index cf69340..a54b535 100644 (file)
@@ -65,6 +65,8 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
+struct task_struct;
+
 /*
  * we dedicate GR28 to keeping a pointer to the current exception frame
  * - gr28 is destroyed on entry to the kernel from userspace
@@ -73,11 +75,18 @@ register struct pt_regs *__frame asm("gr28");
 
 #define user_mode(regs)                        (!((regs)->psr & PSR_S))
 #define instruction_pointer(regs)      ((regs)->pc)
+#define user_stack_pointer(regs)       ((regs)->sp)
 
 extern unsigned long user_stack(const struct pt_regs *);
 extern void show_regs(struct pt_regs *);
 #define profile_pc(regs) ((regs)->pc)
-#endif
+
+#define task_pt_regs(task) ((task)->thread.frame0)
+
+#define arch_has_single_step() (1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
 
 #endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
 #endif /* _ASM_PTRACE_H */
diff --git a/arch/frv/include/asm/syscall.h b/arch/frv/include/asm/syscall.h
new file mode 100644 (file)
index 0000000..70689eb
--- /dev/null
@@ -0,0 +1,123 @@
+/* syscall parameter access functions
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H
+
+#include <linux/err.h>
+#include <asm/ptrace.h>
+
+/*
+ * Get the system call number or -1
+ */
+static inline long syscall_get_nr(struct task_struct *task,
+                                 struct pt_regs *regs)
+{
+       return regs->syscallno;
+}
+
+/*
+ * Restore the clobbered GR8 register
+ * (1st syscall arg was overwritten with syscall return or error)
+ */
+static inline void syscall_rollback(struct task_struct *task,
+                                   struct pt_regs *regs)
+{
+       regs->gr8 = regs->orig_gr8;
+}
+
+/*
+ * See if the syscall return value is an error, returning it if it is and 0 if
+ * not
+ */
+static inline long syscall_get_error(struct task_struct *task,
+                                    struct pt_regs *regs)
+{
+       return IS_ERR_VALUE(regs->gr8) ? regs->gr8 : 0;
+}
+
+/*
+ * Get the syscall return value
+ */
+static inline long syscall_get_return_value(struct task_struct *task,
+                                           struct pt_regs *regs)
+{
+       return regs->gr8;
+}
+
+/*
+ * Set the syscall return value
+ */
+static inline void syscall_set_return_value(struct task_struct *task,
+                                           struct pt_regs *regs,
+                                           int error, long val)
+{
+       if (error)
+               regs->gr8 = -error;
+       else
+               regs->gr8 = val;
+}
+
+/*
+ * Retrieve the system call arguments
+ */
+static inline void syscall_get_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        unsigned long *args)
+{
+       /*
+        * Do this simply for now. If we need to start supporting
+        * fetching arguments from arbitrary indices, this will need some
+        * extra logic. Presently there are no in-tree users that depend
+        * on this behaviour.
+        */
+       BUG_ON(i);
+
+       /* Argument pattern is: GR8, GR9, GR10, GR11, GR12, GR13 */
+       switch (n) {
+       case 6: args[5] = regs->gr13;
+       case 5: args[4] = regs->gr12;
+       case 4: args[3] = regs->gr11;
+       case 3: args[2] = regs->gr10;
+       case 2: args[1] = regs->gr9;
+       case 1: args[0] = regs->gr8;
+               break;
+       default:
+               BUG();
+       }
+}
+
+/*
+ * Alter the system call arguments
+ */
+static inline void syscall_set_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        const unsigned long *args)
+{
+       /* Same note as above applies */
+       BUG_ON(i);
+
+       switch (n) {
+       case 6: regs->gr13 = args[5];
+       case 5: regs->gr12 = args[4];
+       case 4: regs->gr11 = args[3];
+       case 3: regs->gr10 = args[2];
+       case 2: regs->gr9  = args[1];
+       case 1: regs->gr8  = args[0];
+               break;
+       default:
+               BUG();
+       }
+}
+
+#endif /* _ASM_SYSCALL_H */
index bb53ab7..e8a5ed7 100644 (file)
@@ -109,20 +109,20 @@ register struct thread_info *__current_thread_info asm("gr15");
  * - other flags in MSW
  */
 #define TIF_SYSCALL_TRACE      0       /* syscall trace active */
-#define TIF_SIGPENDING         1       /* signal pending */
-#define TIF_NEED_RESCHED       2       /* rescheduling necessary */
-#define TIF_SINGLESTEP         3       /* restore singlestep on return to user mode */
-#define TIF_IRET               4       /* return with iret */
+#define TIF_NOTIFY_RESUME      1       /* callback before returning to user */
+#define TIF_SIGPENDING         2       /* signal pending */
+#define TIF_NEED_RESCHED       3       /* rescheduling necessary */
+#define TIF_SINGLESTEP         4       /* restore singlestep on return to user mode */
 #define TIF_RESTORE_SIGMASK    5       /* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG     16      /* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE             17      /* OOM killer killed process */
 #define TIF_FREEZE             18      /* freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP                (1 << TIF_SINGLESTEP)
-#define _TIF_IRET              (1 << TIF_IRET)
 #define _TIF_RESTORE_SIGMASK   (1 << TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG    (1 << TIF_POLLING_NRFLAG)
 #define _TIF_FREEZE            (1 << TIF_FREEZE)
index 1da523b..356e0e3 100644 (file)
@@ -886,7 +886,6 @@ system_call:
        bnc             icc0,#0,__syscall_badsys
 
        ldi             @(gr15,#TI_FLAGS),gr4
-       ori             gr4,#_TIF_SYSCALL_TRACE,gr4
        andicc          gr4,#_TIF_SYSCALL_TRACE,gr0,icc0
        bne             icc0,#0,__syscall_trace_entry
 
@@ -1150,11 +1149,10 @@ __entry_work_notifysig:
        # perform syscall entry tracing
 __syscall_trace_entry:
        LEDS            0x6320
-       setlos.p        #0,gr8
-       call            do_syscall_trace
+       call            syscall_trace_entry
 
-       ldi             @(gr28,#REG_SYSCALLNO),gr7
-       lddi            @(gr28,#REG_GR(8)) ,gr8
+       lddi.p          @(gr28,#REG_GR(8)) ,gr8
+       ori             gr8,#0,gr7              ; syscall_trace_entry() returned new syscallno
        lddi            @(gr28,#REG_GR(10)),gr10
        lddi.p          @(gr28,#REG_GR(12)),gr12
 
@@ -1169,11 +1167,10 @@ __syscall_exit_work:
        beq             icc0,#1,__entry_work_pending
 
        movsg           psr,gr23
-       andi            gr23,#~PSR_PIL,gr23     ; could let do_syscall_trace() call schedule()
+       andi            gr23,#~PSR_PIL,gr23     ; could let syscall_trace_exit() call schedule()
        movgs           gr23,psr
 
-       setlos.p        #1,gr8
-       call            do_syscall_trace
+       call            syscall_trace_exit
        bra             __entry_resume_userspace
 
 __syscall_badsys:
index 5e7d401..60eeed3 100644 (file)
@@ -19,6 +19,9 @@
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
  */
 
 /*
+ * retrieve the contents of FRV userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      void *kbuf, void __user *ubuf)
+{
+       const struct user_int_regs *iregs = &target->thread.user->i;
+       int ret;
+
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 iregs, 0, sizeof(*iregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*iregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace general registers
+ */
+static int genregs_set(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      const void *kbuf, const void __user *ubuf)
+{
+       struct user_int_regs *iregs = &target->thread.user->i;
+       unsigned int offs_gr0, offs_gr1;
+       int ret;
+
+       /* not allowed to set PSR or __status */
+       if (pos < offsetof(struct user_int_regs, psr) + sizeof(long) &&
+           pos + count > offsetof(struct user_int_regs, psr))
+               return -EIO;
+
+       if (pos < offsetof(struct user_int_regs, __status) + sizeof(long) &&
+           pos + count > offsetof(struct user_int_regs, __status))
+               return -EIO;
+
+       /* set the control regs */
+       offs_gr0 = offsetof(struct user_int_regs, gr[0]);
+       offs_gr1 = offsetof(struct user_int_regs, gr[1]);
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                iregs, 0, offs_gr0);
+       if (ret < 0)
+               return ret;
+
+       /* skip GR0/TBR */
+       ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                       offs_gr0, offs_gr1);
+       if (ret < 0)
+               return ret;
+
+       /* set the general regs */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &iregs->gr[1], offs_gr1, sizeof(*iregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*iregs), -1);
+}
+
+/*
+ * retrieve the contents of FRV userspace FP/Media registers
+ */
+static int fpmregs_get(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      void *kbuf, void __user *ubuf)
+{
+       const struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+       int ret;
+
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 fpregs, 0, sizeof(*fpregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*fpregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace FP/Media registers
+ */
+static int fpmregs_set(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      const void *kbuf, const void __user *ubuf)
+{
+       struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+       int ret;
+
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                fpregs, 0, sizeof(*fpregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*fpregs), -1);
+}
+
+/*
+ * determine if the FP/Media registers have actually been used
+ */
+static int fpmregs_active(struct task_struct *target,
+                         const struct user_regset *regset)
+{
+       return tsk_used_math(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the FRV under Linux
+ */
+enum frv_regset {
+       REGSET_GENERAL,
+       REGSET_FPMEDIA,
+};
+
+static const struct user_regset frv_regsets[] = {
+       /*
+        * General register format is:
+        *      PSR, ISR, CCR, CCCR, LR, LCR, PC, (STATUS), SYSCALLNO, ORIG_G8
+        *      GNER0-1, IACC0, TBR, GR1-63
+        */
+       [REGSET_GENERAL] = {
+               .core_note_type = NT_PRSTATUS,
+               .n              = ELF_NGREG,
+               .size           = sizeof(long),
+               .align          = sizeof(long),
+               .get            = genregs_get,
+               .set            = genregs_set,
+       },
+       /*
+        * FPU/Media register format is:
+        *      FR0-63, FNER0-1, MSR0-1, ACC0-7, ACCG0-8, FSR
+        */
+       [REGSET_FPMEDIA] = {
+               .core_note_type = NT_PRFPREG,
+               .n              = sizeof(struct user_fpmedia_regs) / sizeof(long),
+               .size           = sizeof(long),
+               .align          = sizeof(long),
+               .get            = fpmregs_get,
+               .set            = fpmregs_set,
+               .active         = fpmregs_active,
+       },
+};
+
+static const struct user_regset_view user_frv_native_view = {
+       .name           = "frv",
+       .e_machine      = EM_FRV,
+       .regsets        = frv_regsets,
+       .n              = ARRAY_SIZE(frv_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+       return &user_frv_native_view;
+}
+
+/*
  * Get contents of register REGNO in task TASK.
  */
 static inline long get_reg(struct task_struct *task, int regno)
@@ -69,40 +235,23 @@ static inline int put_reg(struct task_struct *task, int regno,
 }
 
 /*
- * check that an address falls within the bounds of the target process's memory
- * mappings
- */
-static inline int is_user_addr_valid(struct task_struct *child,
-                                    unsigned long start, unsigned long len)
-{
-#ifdef CONFIG_MMU
-       if (start >= PAGE_OFFSET || len > PAGE_OFFSET - start)
-               return -EIO;
-       return 0;
-#else
-       struct vm_area_struct *vma;
-
-       vma = find_vma(child->mm, start);
-       if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
-               return 0;
-
-       return -EIO;
-#endif
-}
-
-/*
  * Called by kernel/ptrace.c when detaching..
  *
  * Control h/w single stepping
  */
-void ptrace_disable(struct task_struct *child)
+void user_enable_single_step(struct task_struct *child)
+{
+       child->thread.frame0->__status |= REG__STATUS_STEP;
+}
+
+void user_disable_single_step(struct task_struct *child)
 {
        child->thread.frame0->__status &= ~REG__STATUS_STEP;
 }
 
-void ptrace_enable(struct task_struct *child)
+void ptrace_disable(struct task_struct *child)
 {
-       child->thread.frame0->__status |= REG__STATUS_STEP;
+       user_disable_single_step(child);
 }
 
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
@@ -111,15 +260,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
        int ret;
 
        switch (request) {
-               /* when I and D space are separate, these will need to be fixed. */
-       case PTRACE_PEEKTEXT: /* read word at location addr. */
-       case PTRACE_PEEKDATA:
-               ret = -EIO;
-               if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-                       break;
-               ret = generic_ptrace_peekdata(child, addr, data);
-               break;
-
                /* read the word at location addr in the USER area. */
        case PTRACE_PEEKUSR: {
                tmp = 0;
@@ -163,15 +303,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
        }
 
-               /* when I and D space are separate, this will have to be fixed. */
-       case PTRACE_POKETEXT: /* write the word at location addr. */
-       case PTRACE_POKEDATA:
-               ret = -EIO;
-               if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-                       break;
-               ret = generic_ptrace_pokedata(child, addr, data);
-               break;
-
        case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
                ret = -EIO;
                if ((addr & 3) || addr < 0)
@@ -179,7 +310,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
                ret = 0;
                switch (addr >> 2) {
-               case 0 ... PT__END-1:
+               case 0 ... PT__END - 1:
                        ret = put_reg(child, addr >> 2, data);
                        break;
 
@@ -189,95 +320,29 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                }
                break;
 
-       case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-       case PTRACE_CONT: /* restart after signal. */
-               ret = -EIO;
-               if (!valid_signal(data))
-                       break;
-               if (request == PTRACE_SYSCALL)
-                       set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               else
-                       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               child->exit_code = data;
-               ptrace_disable(child);
-               wake_up_process(child);
-               ret = 0;
-               break;
-
-               /* make the child exit.  Best I can do is send it a sigkill.
-                * perhaps it should be put in the status that it wants to
-                * exit.
-                */
-       case PTRACE_KILL:
-               ret = 0;
-               if (child->exit_state == EXIT_ZOMBIE)   /* already dead */
-                       break;
-               child->exit_code = SIGKILL;
-               clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-               ptrace_disable(child);
-               wake_up_process(child);
-               break;
-
-       case PTRACE_SINGLESTEP:  /* set the trap flag. */
-               ret = -EIO;
-               if (!valid_signal(data))
-                       break;
-               clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               ptrace_enable(child);
-               child->exit_code = data;
-               wake_up_process(child);
-               ret = 0;
-               break;
-
-       case PTRACE_DETACH:     /* detach a process that was attached. */
-               ret = ptrace_detach(child, data);
-               break;
-
-       case PTRACE_GETREGS: { /* Get all integer regs from the child. */
-               int i;
-               for (i = 0; i < PT__GPEND; i++) {
-                       tmp = get_reg(child, i);
-                       if (put_user(tmp, (unsigned long *) data)) {
-                               ret = -EFAULT;
-                               break;
-                       }
-                       data += sizeof(long);
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_SETREGS: { /* Set all integer regs in the child. */
-               int i;
-               for (i = 0; i < PT__GPEND; i++) {
-                       if (get_user(tmp, (unsigned long *) data)) {
-                               ret = -EFAULT;
-                               break;
-                       }
-                       put_reg(child, i, tmp);
-                       data += sizeof(long);
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */
-               ret = 0;
-               if (copy_to_user((void *) data,
-                                &child->thread.user->f,
-                                sizeof(child->thread.user->f)))
-                       ret = -EFAULT;
-               break;
-       }
-
-       case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */
-               ret = 0;
-               if (copy_from_user(&child->thread.user->f,
-                                  (void *) data,
-                                  sizeof(child->thread.user->f)))
-                       ret = -EFAULT;
-               break;
-       }
+       case PTRACE_GETREGS:    /* Get all integer regs from the child. */
+               return copy_regset_to_user(child, &user_frv_native_view,
+                                          REGSET_GENERAL,
+                                          0, sizeof(child->thread.user->i),
+                                          (void __user *)data);
+
+       case PTRACE_SETREGS:    /* Set all integer regs in the child. */
+               return copy_regset_from_user(child, &user_frv_native_view,
+                                            REGSET_GENERAL,
+                                            0, sizeof(child->thread.user->i),
+                                            (const void __user *)data);
+
+       case PTRACE_GETFPREGS:  /* Get the child FP/Media state. */
+               return copy_regset_to_user(child, &user_frv_native_view,
+                                          REGSET_FPMEDIA,
+                                          0, sizeof(child->thread.user->f),
+                                          (void __user *)data);
+
+       case PTRACE_SETFPREGS:  /* Set the child FP/Media state. */
+               return copy_regset_from_user(child, &user_frv_native_view,
+                                            REGSET_FPMEDIA,
+                                            0, sizeof(child->thread.user->f),
+                                            (const void __user *)data);
 
        case PTRACE_GETFDPIC:
                tmp = 0;
@@ -300,414 +365,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
 
        default:
-               ret = -EIO;
+               ret = ptrace_request(child, request, addr, data);
                break;
        }
        return ret;
 }
 
-int __nongprelbss kstrace;
-
-static const struct {
-       const char      *name;
-       unsigned        argmask;
-} __syscall_name_table[NR_syscalls] = {
-       [0]     = { "restart_syscall"                   },
-       [1]     = { "exit",             0x000001        },
-       [2]     = { "fork",             0xffffff        },
-       [3]     = { "read",             0x000141        },
-       [4]     = { "write",            0x000141        },
-       [5]     = { "open",             0x000235        },
-       [6]     = { "close",            0x000001        },
-       [7]     = { "waitpid",          0x000141        },
-       [8]     = { "creat",            0x000025        },
-       [9]     = { "link",             0x000055        },
-       [10]    = { "unlink",           0x000005        },
-       [11]    = { "execve",           0x000445        },
-       [12]    = { "chdir",            0x000005        },
-       [13]    = { "time",             0x000004        },
-       [14]    = { "mknod",            0x000325        },
-       [15]    = { "chmod",            0x000025        },
-       [16]    = { "lchown",           0x000025        },
-       [17]    = { "break" },
-       [18]    = { "oldstat",          0x000045        },
-       [19]    = { "lseek",            0x000131        },
-       [20]    = { "getpid",           0xffffff        },
-       [21]    = { "mount",            0x043555        },
-       [22]    = { "umount",           0x000005        },
-       [23]    = { "setuid",           0x000001        },
-       [24]    = { "getuid",           0xffffff        },
-       [25]    = { "stime",            0x000004        },
-       [26]    = { "ptrace",           0x004413        },
-       [27]    = { "alarm",            0x000001        },
-       [28]    = { "oldfstat",         0x000041        },
-       [29]    = { "pause",            0xffffff        },
-       [30]    = { "utime",            0x000045        },
-       [31]    = { "stty" },
-       [32]    = { "gtty" },
-       [33]    = { "access",           0x000025        },
-       [34]    = { "nice",             0x000001        },
-       [35]    = { "ftime" },
-       [36]    = { "sync",             0xffffff        },
-       [37]    = { "kill",             0x000011        },
-       [38]    = { "rename",           0x000055        },
-       [39]    = { "mkdir",            0x000025        },
-       [40]    = { "rmdir",            0x000005        },
-       [41]    = { "dup",              0x000001        },
-       [42]    = { "pipe",             0x000004        },
-       [43]    = { "times",            0x000004        },
-       [44]    = { "prof" },
-       [45]    = { "brk",              0x000004        },
-       [46]    = { "setgid",           0x000001        },
-       [47]    = { "getgid",           0xffffff        },
-       [48]    = { "signal",           0x000041        },
-       [49]    = { "geteuid",          0xffffff        },
-       [50]    = { "getegid",          0xffffff        },
-       [51]    = { "acct",             0x000005        },
-       [52]    = { "umount2",          0x000035        },
-       [53]    = { "lock" },
-       [54]    = { "ioctl",            0x000331        },
-       [55]    = { "fcntl",            0x000331        },
-       [56]    = { "mpx" },
-       [57]    = { "setpgid",          0x000011        },
-       [58]    = { "ulimit" },
-       [60]    = { "umask",            0x000002        },
-       [61]    = { "chroot",           0x000005        },
-       [62]    = { "ustat",            0x000043        },
-       [63]    = { "dup2",             0x000011        },
-       [64]    = { "getppid",          0xffffff        },
-       [65]    = { "getpgrp",          0xffffff        },
-       [66]    = { "setsid",           0xffffff        },
-       [67]    = { "sigaction" },
-       [68]    = { "sgetmask" },
-       [69]    = { "ssetmask" },
-       [70]    = { "setreuid" },
-       [71]    = { "setregid" },
-       [72]    = { "sigsuspend" },
-       [73]    = { "sigpending" },
-       [74]    = { "sethostname" },
-       [75]    = { "setrlimit" },
-       [76]    = { "getrlimit" },
-       [77]    = { "getrusage" },
-       [78]    = { "gettimeofday" },
-       [79]    = { "settimeofday" },
-       [80]    = { "getgroups" },
-       [81]    = { "setgroups" },
-       [82]    = { "select" },
-       [83]    = { "symlink" },
-       [84]    = { "oldlstat" },
-       [85]    = { "readlink" },
-       [86]    = { "uselib" },
-       [87]    = { "swapon" },
-       [88]    = { "reboot" },
-       [89]    = { "readdir" },
-       [91]    = { "munmap",           0x000034        },
-       [92]    = { "truncate" },
-       [93]    = { "ftruncate" },
-       [94]    = { "fchmod" },
-       [95]    = { "fchown" },
-       [96]    = { "getpriority" },
-       [97]    = { "setpriority" },
-       [99]    = { "statfs" },
-       [100]   = { "fstatfs" },
-       [102]   = { "socketcall" },
-       [103]   = { "syslog" },
-       [104]   = { "setitimer" },
-       [105]   = { "getitimer" },
-       [106]   = { "stat" },
-       [107]   = { "lstat" },
-       [108]   = { "fstat" },
-       [111]   = { "vhangup" },
-       [114]   = { "wait4" },
-       [115]   = { "swapoff" },
-       [116]   = { "sysinfo" },
-       [117]   = { "ipc" },
-       [118]   = { "fsync" },
-       [119]   = { "sigreturn" },
-       [120]   = { "clone" },
-       [121]   = { "setdomainname" },
-       [122]   = { "uname" },
-       [123]   = { "modify_ldt" },
-       [123]   = { "cacheflush" },
-       [124]   = { "adjtimex" },
-       [125]   = { "mprotect" },
-       [126]   = { "sigprocmask" },
-       [127]   = { "create_module" },
-       [128]   = { "init_module" },
-       [129]   = { "delete_module" },
-       [130]   = { "get_kernel_syms" },
-       [131]   = { "quotactl" },
-       [132]   = { "getpgid" },
-       [133]   = { "fchdir" },
-       [134]   = { "bdflush" },
-       [135]   = { "sysfs" },
-       [136]   = { "personality" },
-       [137]   = { "afs_syscall" },
-       [138]   = { "setfsuid" },
-       [139]   = { "setfsgid" },
-       [140]   = { "_llseek",                  0x014331        },
-       [141]   = { "getdents" },
-       [142]   = { "_newselect",               0x000141        },
-       [143]   = { "flock" },
-       [144]   = { "msync" },
-       [145]   = { "readv" },
-       [146]   = { "writev" },
-       [147]   = { "getsid",                   0x000001        },
-       [148]   = { "fdatasync",                0x000001        },
-       [149]   = { "_sysctl",                  0x000004        },
-       [150]   = { "mlock" },
-       [151]   = { "munlock" },
-       [152]   = { "mlockall" },
-       [153]   = { "munlockall" },
-       [154]   = { "sched_setparam" },
-       [155]   = { "sched_getparam" },
-       [156]   = { "sched_setscheduler" },
-       [157]   = { "sched_getscheduler" },
-       [158]   = { "sched_yield" },
-       [159]   = { "sched_get_priority_max" },
-       [160]   = { "sched_get_priority_min" },
-       [161]   = { "sched_rr_get_interval" },
-       [162]   = { "nanosleep",                0x000044        },
-       [163]   = { "mremap" },
-       [164]   = { "setresuid" },
-       [165]   = { "getresuid" },
-       [166]   = { "vm86" },
-       [167]   = { "query_module" },
-       [168]   = { "poll" },
-       [169]   = { "nfsservctl" },
-       [170]   = { "setresgid" },
-       [171]   = { "getresgid" },
-       [172]   = { "prctl",                    0x333331        },
-       [173]   = { "rt_sigreturn",             0xffffff        },
-       [174]   = { "rt_sigaction",             0x001441        },
-       [175]   = { "rt_sigprocmask",           0x001441        },
-       [176]   = { "rt_sigpending",            0x000014        },
-       [177]   = { "rt_sigtimedwait",          0x001444        },
-       [178]   = { "rt_sigqueueinfo",          0x000411        },
-       [179]   = { "rt_sigsuspend",            0x000014        },
-       [180]   = { "pread",                    0x003341        },
-       [181]   = { "pwrite",                   0x003341        },
-       [182]   = { "chown",                    0x000115        },
-       [183]   = { "getcwd" },
-       [184]   = { "capget" },
-       [185]   = { "capset" },
-       [186]   = { "sigaltstack" },
-       [187]   = { "sendfile" },
-       [188]   = { "getpmsg" },
-       [189]   = { "putpmsg" },
-       [190]   = { "vfork",                    0xffffff        },
-       [191]   = { "ugetrlimit" },
-       [192]   = { "mmap2",                    0x313314        },
-       [193]   = { "truncate64" },
-       [194]   = { "ftruncate64" },
-       [195]   = { "stat64",                   0x000045        },
-       [196]   = { "lstat64",                  0x000045        },
-       [197]   = { "fstat64",                  0x000041        },
-       [198]   = { "lchown32" },
-       [199]   = { "getuid32",                 0xffffff        },
-       [200]   = { "getgid32",                 0xffffff        },
-       [201]   = { "geteuid32",                0xffffff        },
-       [202]   = { "getegid32",                0xffffff        },
-       [203]   = { "setreuid32" },
-       [204]   = { "setregid32" },
-       [205]   = { "getgroups32" },
-       [206]   = { "setgroups32" },
-       [207]   = { "fchown32" },
-       [208]   = { "setresuid32" },
-       [209]   = { "getresuid32" },
-       [210]   = { "setresgid32" },
-       [211]   = { "getresgid32" },
-       [212]   = { "chown32" },
-       [213]   = { "setuid32" },
-       [214]   = { "setgid32" },
-       [215]   = { "setfsuid32" },
-       [216]   = { "setfsgid32" },
-       [217]   = { "pivot_root" },
-       [218]   = { "mincore" },
-       [219]   = { "madvise" },
-       [220]   = { "getdents64" },
-       [221]   = { "fcntl64" },
-       [223]   = { "security" },
-       [224]   = { "gettid" },
-       [225]   = { "readahead" },
-       [226]   = { "setxattr" },
-       [227]   = { "lsetxattr" },
-       [228]   = { "fsetxattr" },
-       [229]   = { "getxattr" },
-       [230]   = { "lgetxattr" },
-       [231]   = { "fgetxattr" },
-       [232]   = { "listxattr" },
-       [233]   = { "llistxattr" },
-       [234]   = { "flistxattr" },
-       [235]   = { "removexattr" },
-       [236]   = { "lremovexattr" },
-       [237]   = { "fremovexattr" },
-       [238]   = { "tkill" },
-       [239]   = { "sendfile64" },
-       [240]   = { "futex" },
-       [241]   = { "sched_setaffinity" },
-       [242]   = { "sched_getaffinity" },
-       [243]   = { "set_thread_area" },
-       [244]   = { "get_thread_area" },
-       [245]   = { "io_setup" },
-       [246]   = { "io_destroy" },
-       [247]   = { "io_getevents" },
-       [248]   = { "io_submit" },
-       [249]   = { "io_cancel" },
-       [250]   = { "fadvise64" },
-       [252]   = { "exit_group",               0x000001        },
-       [253]   = { "lookup_dcookie" },
-       [254]   = { "epoll_create" },
-       [255]   = { "epoll_ctl" },
-       [256]   = { "epoll_wait" },
-       [257]   = { "remap_file_pages" },
-       [258]   = { "set_tid_address" },
-       [259]   = { "timer_create" },
-       [260]   = { "timer_settime" },
-       [261]   = { "timer_gettime" },
-       [262]   = { "timer_getoverrun" },
-       [263]   = { "timer_delete" },
-       [264]   = { "clock_settime" },
-       [265]   = { "clock_gettime" },
-       [266]   = { "clock_getres" },
-       [267]   = { "clock_nanosleep" },
-       [268]   = { "statfs64" },
-       [269]   = { "fstatfs64" },
-       [270]   = { "tgkill" },
-       [271]   = { "utimes" },
-       [272]   = { "fadvise64_64" },
-       [273]   = { "vserver" },
-       [274]   = { "mbind" },
-       [275]   = { "get_mempolicy" },
-       [276]   = { "set_mempolicy" },
-       [277]   = { "mq_open" },
-       [278]   = { "mq_unlink" },
-       [279]   = { "mq_timedsend" },
-       [280]   = { "mq_timedreceive" },
-       [281]   = { "mq_notify" },
-       [282]   = { "mq_getsetattr" },
-       [283]   = { "sys_kexec_load" },
-};
-
-asmlinkage void do_syscall_trace(int leaving)
+/*
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
+ */
+asmlinkage unsigned long syscall_trace_entry(void)
 {
-#if 0
-       unsigned long *argp;
-       const char *name;
-       unsigned argmask;
-       char buffer[16];
-
-       if (!kstrace)
-               return;
-
-       if (!current->mm)
-               return;
-
-       if (__frame->gr7 == __NR_close)
-               return;
-
-#if 0
-       if (__frame->gr7 != __NR_mmap2 &&
-           __frame->gr7 != __NR_vfork &&
-           __frame->gr7 != __NR_execve &&
-           __frame->gr7 != __NR_exit)
-               return;
-#endif
-
-       argmask = 0;
-       name = NULL;
-       if (__frame->gr7 < NR_syscalls) {
-               name = __syscall_name_table[__frame->gr7].name;
-               argmask = __syscall_name_table[__frame->gr7].argmask;
-       }
-       if (!name) {
-               sprintf(buffer, "sys_%lx", __frame->gr7);
-               name = buffer;
-       }
-
-       if (!leaving) {
-               if (!argmask) {
-                       printk(KERN_CRIT "[%d] %s(%lx,%lx,%lx,%lx,%lx,%lx)\n",
-                              current->pid,
-                              name,
-                              __frame->gr8,
-                              __frame->gr9,
-                              __frame->gr10,
-                              __frame->gr11,
-                              __frame->gr12,
-                              __frame->gr13);
-               }
-               else if (argmask == 0xffffff) {
-                       printk(KERN_CRIT "[%d] %s()\n",
-                              current->pid,
-                              name);
-               }
-               else {
-                       printk(KERN_CRIT "[%d] %s(",
-                              current->pid,
-                              name);
-
-                       argp = &__frame->gr8;
-
-                       do {
-                               switch (argmask & 0xf) {
-                               case 1:
-                                       printk("%ld", (long) *argp);
-                                       break;
-                               case 2:
-                                       printk("%lo", *argp);
-                                       break;
-                               case 3:
-                                       printk("%lx", *argp);
-                                       break;
-                               case 4:
-                                       printk("%p", (void *) *argp);
-                                       break;
-                               case 5:
-                                       printk("\"%s\"", (char *) *argp);
-                                       break;
-                               }
-
-                               argp++;
-                               argmask >>= 4;
-                               if (argmask)
-                                       printk(",");
-
-                       } while (argmask);
-
-                       printk(")\n");
-               }
-       }
-       else {
-               if ((int)__frame->gr8 > -4096 && (int)__frame->gr8 < 4096)
-                       printk(KERN_CRIT "[%d] %s() = %ld\n", current->pid, name, __frame->gr8);
-               else
-                       printk(KERN_CRIT "[%d] %s() = %lx\n", current->pid, name, __frame->gr8);
+       __frame->__status |= REG__STATUS_SYSC_ENTRY;
+       if (tracehook_report_syscall_entry(__frame)) {
+               /* tracing decided this syscall should not happen, so
+                * We'll return a bogus call number to get an ENOSYS
+                * error, but leave the original number in
+                * __frame->syscallno
+                */
+               return ULONG_MAX;
        }
-       return;
-#endif
-
-       if (!test_thread_flag(TIF_SYSCALL_TRACE))
-               return;
-
-       if (!(current->ptrace & PT_PTRACED))
-               return;
 
-       /* we need to indicate entry or exit to strace */
-       if (leaving)
-               __frame->__status |= REG__STATUS_SYSC_EXIT;
-       else
-               __frame->__status |= REG__STATUS_SYSC_ENTRY;
-
-       ptrace_notify(SIGTRAP);
+       return __frame->syscallno;
+}
 
-       /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
-        */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(void)
+{
+       __frame->__status |= REG__STATUS_SYSC_EXIT;
+       tracehook_report_syscall_exit(__frame, 0);
 }
index 3bdb368..4a7a62c 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/unistd.h>
 #include <linux/personality.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -516,6 +517,9 @@ static void do_signal(void)
                         * clear the TIF_RESTORE_SIGMASK flag */
                        if (test_thread_flag(TIF_RESTORE_SIGMASK))
                                clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+                       tracehook_signal_handler(signr, &info, &ka, __frame,
+                                                test_thread_flag(TIF_SINGLESTEP));
                }
 
                return;
@@ -564,4 +568,10 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
        if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
                do_signal();
 
+       /* deal with notification on about to resume userspace execution */
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(__frame);
+       }
+
 } /* end do_notify_resume() */
index 9fb771a..374f88d 100644 (file)
@@ -23,8 +23,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
        char *p, ch;
        long err = -EFAULT;
 
-       if (count < 0)
-               BUG();
+       BUG_ON(count < 0);
 
        p = dst;
 
@@ -76,8 +75,7 @@ long strnlen_user(const char __user *src, long count)
        long err = 0;
        char ch;
 
-       if (count < 0)
-               BUG();
+       BUG_ON(count < 0);
 
 #ifndef CONFIG_MMU
        if ((unsigned long) src < memory_start)
index 52ff9ae..4e1ba0b 100644 (file)
@@ -116,8 +116,7 @@ EXPORT_SYMBOL(dma_free_coherent);
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
                          enum dma_data_direction direction)
 {
-       if (direction == DMA_NONE)
-                BUG();
+       BUG_ON(direction == DMA_NONE);
 
        frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -151,8 +150,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
                frv_cache_wback_inv(sg_dma_address(&sg[i]),
                                    sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]));
 
-       if (direction == DMA_NONE)
-                BUG();
+       BUG_ON(direction == DMA_NONE);
 
        return nents;
 }
index 3ddedeb..45954f0 100644 (file)
@@ -48,8 +48,7 @@ EXPORT_SYMBOL(dma_free_coherent);
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
                          enum dma_data_direction direction)
 {
-       if (direction == DMA_NONE)
-                BUG();
+       BUG_ON(direction == DMA_NONE);
 
        frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -81,8 +80,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
        void *vaddr;
        int i;
 
-       if (direction == DMA_NONE)
-                BUG();
+       BUG_ON(direction == DMA_NONE);
 
        dampr2 = __get_DAMPR(2);
 
index 3559267..89faaca 100644 (file)
@@ -8,6 +8,7 @@ mainmenu "Linux Kernel Configuration"
 config MN10300
        def_bool y
        select HAVE_OPROFILE
+       select HAVE_ARCH_TRACEHOOK
 
 config AM33
        def_bool y
index bf09f8b..4910546 100644 (file)
@@ -34,7 +34,7 @@
  */
 typedef unsigned long elf_greg_t;
 
-#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t))
+#define ELF_NGREG ((sizeof(struct pt_regs) / sizeof(elf_greg_t)) - 1)
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 #define ELF_NFPREG 32
@@ -76,6 +76,7 @@ do {                                                                  \
 } while (0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE      4096
 
 /*
index 7323927..f7d4b0d 100644 (file)
@@ -143,13 +143,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
 unsigned long get_wchan(struct task_struct *p);
 
-#define task_pt_regs(task)                                             \
-({                                                                     \
-       struct pt_regs *__regs__;                                       \
-       __regs__ = (struct pt_regs *) (KSTK_TOP(task_stack_page(task)) - 8); \
-       __regs__ - 1;                                                   \
-})
-
+#define task_pt_regs(task) ((task)->thread.uregs)
 #define KSTK_EIP(task) (task_pt_regs(task)->pc)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
index 7b06cc6..921942e 100644 (file)
@@ -91,9 +91,17 @@ extern struct pt_regs *__frame; /* current frame pointer */
 #if defined(__KERNEL__)
 
 #if !defined(__ASSEMBLY__)
+struct task_struct;
+
 #define user_mode(regs)                        (((regs)->epsw & EPSW_nSL) == EPSW_nSL)
 #define instruction_pointer(regs)      ((regs)->pc)
+#define user_stack_pointer(regs)       ((regs)->sp)
 extern void show_regs(struct pt_regs *);
+
+#define arch_has_single_step() (1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
 #endif  /*  !__ASSEMBLY  */
 
 #define profile_pc(regs) ((regs)->pc)
index 3dc3e46..7408a27 100644 (file)
@@ -76,7 +76,7 @@ ENTRY(system_call)
        cmp     nr_syscalls,d0
        bcc     syscall_badsys
        btst    _TIF_SYSCALL_TRACE,(TI_flags,a2)
-       bne     syscall_trace_entry
+       bne     syscall_entry_trace
 syscall_call:
        add     d0,d0,a1
        add     a1,a1
@@ -104,11 +104,10 @@ restore_all:
 syscall_exit_work:
        btst    _TIF_SYSCALL_TRACE,d2
        beq     work_pending
-       __sti                           # could let do_syscall_trace() call
+       __sti                           # could let syscall_trace_exit() call
                                        # schedule() instead
        mov     fp,d0
-       mov     1,d1
-       call    do_syscall_trace[],0    # do_syscall_trace(regs,entryexit)
+       call    syscall_trace_exit[],0  # do_syscall_trace(regs)
        jmp     resume_userspace
 
        ALIGN
@@ -138,13 +137,11 @@ work_notifysig:
        jmp     resume_userspace
 
        # perform syscall entry tracing
-syscall_trace_entry:
+syscall_entry_trace:
        mov     -ENOSYS,d0
        mov     d0,(REG_D0,fp)
        mov     fp,d0
-       clr     d1
-       call    do_syscall_trace[],0
-       mov     (REG_ORIG_D0,fp),d0
+       call    syscall_trace_entry[],0 # returns the syscall number to actually use
        mov     (REG_D1,fp),d1
        cmp     nr_syscalls,d0
        bcs     syscall_call
index d6d6cdc..e143339 100644 (file)
@@ -17,6 +17,9 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -64,12 +67,6 @@ static inline int get_stack_long(struct task_struct *task, int offset)
                ((unsigned long) task->thread.uregs + offset);
 }
 
-/*
- * this routine will put a word on the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
 static inline
 int put_stack_long(struct task_struct *task, int offset, unsigned long data)
 {
@@ -80,94 +77,233 @@ int put_stack_long(struct task_struct *task, int offset, unsigned long data)
        return 0;
 }
 
-static inline unsigned long get_fpregs(struct fpu_state_struct *buf,
-                                      struct task_struct *tsk)
+/*
+ * retrieve the contents of MN10300 userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      void *kbuf, void __user *ubuf)
 {
-       return __copy_to_user(buf, &tsk->thread.fpu_state,
-                             sizeof(struct fpu_state_struct));
+       const struct pt_regs *regs = task_pt_regs(target);
+       int ret;
+
+       /* we need to skip regs->next */
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 regs, 0, PT_ORIG_D0 * sizeof(long));
+       if (ret < 0)
+               return ret;
+
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+                                 NR_PTREGS * sizeof(long));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                       NR_PTREGS * sizeof(long), -1);
 }
 
-static inline unsigned long set_fpregs(struct task_struct *tsk,
-                                      struct fpu_state_struct *buf)
+/*
+ * update the contents of the MN10300 userspace general registers
+ */
+static int genregs_set(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      const void *kbuf, const void __user *ubuf)
 {
-       return __copy_from_user(&tsk->thread.fpu_state, buf,
-                               sizeof(struct fpu_state_struct));
+       struct pt_regs *regs = task_pt_regs(target);
+       unsigned long tmp;
+       int ret;
+
+       /* we need to skip regs->next */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                regs, 0, PT_ORIG_D0 * sizeof(long));
+       if (ret < 0)
+               return ret;
+
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+                                PT_EPSW * sizeof(long));
+       if (ret < 0)
+               return ret;
+
+       /* we need to mask off changes to EPSW */
+       tmp = regs->epsw;
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &tmp, PT_EPSW * sizeof(long),
+                                PT_PC * sizeof(long));
+       tmp &= EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | EPSW_FLAG_Z;
+       tmp |= regs->epsw & ~(EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N |
+                             EPSW_FLAG_Z);
+       regs->epsw = tmp;
+
+       if (ret < 0)
+               return ret;
+
+       /* and finally load the PC */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->pc, PT_PC * sizeof(long),
+                                NR_PTREGS * sizeof(long));
+
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                        NR_PTREGS * sizeof(long), -1);
 }
 
-static inline void fpsave_init(struct task_struct *task)
+/*
+ * retrieve the contents of MN10300 userspace FPU registers
+ */
+static int fpuregs_get(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      void *kbuf, void __user *ubuf)
 {
-       memset(&task->thread.fpu_state, 0, sizeof(struct fpu_state_struct));
+       const struct fpu_state_struct *fpregs = &target->thread.fpu_state;
+       int ret;
+
+       unlazy_fpu(target);
+
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 fpregs, 0, sizeof(*fpregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*fpregs), -1);
 }
 
 /*
- * make sure the single step bit is not set
+ * update the contents of the MN10300 userspace FPU registers
  */
-void ptrace_disable(struct task_struct *child)
+static int fpuregs_set(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      const void *kbuf, const void __user *ubuf)
+{
+       struct fpu_state_struct fpu_state = target->thread.fpu_state;
+       int ret;
+
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &fpu_state, 0, sizeof(fpu_state));
+       if (ret < 0)
+               return ret;
+
+       fpu_kill_state(target);
+       target->thread.fpu_state = fpu_state;
+       set_using_fpu(target);
+
+       return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                        sizeof(fpu_state), -1);
+}
+
+/*
+ * determine if the FPU registers have actually been used
+ */
+static int fpuregs_active(struct task_struct *target,
+                         const struct user_regset *regset)
+{
+       return is_using_fpu(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the MN10300 under Linux
+ */
+enum mn10300_regset {
+       REGSET_GENERAL,
+       REGSET_FPU,
+};
+
+static const struct user_regset mn10300_regsets[] = {
+       /*
+        * General register format is:
+        *      A3, A2, D3, D2, MCVF, MCRL, MCRH, MDRQ
+        *      E1, E0, E7...E2, SP, LAR, LIR, MDR
+        *      A1, A0, D1, D0, ORIG_D0, EPSW, PC
+        */
+       [REGSET_GENERAL] = {
+               .core_note_type = NT_PRSTATUS,
+               .n              = ELF_NGREG,
+               .size           = sizeof(long),
+               .align          = sizeof(long),
+               .get            = genregs_get,
+               .set            = genregs_set,
+       },
+       /*
+        * FPU register format is:
+        *      FS0-31, FPCR
+        */
+       [REGSET_FPU] = {
+               .core_note_type = NT_PRFPREG,
+               .n              = sizeof(struct fpu_state_struct) / sizeof(long),
+               .size           = sizeof(long),
+               .align          = sizeof(long),
+               .get            = fpuregs_get,
+               .set            = fpuregs_set,
+               .active         = fpuregs_active,
+       },
+};
+
+static const struct user_regset_view user_mn10300_native_view = {
+       .name           = "mn10300",
+       .e_machine      = EM_MN10300,
+       .regsets        = mn10300_regsets,
+       .n              = ARRAY_SIZE(mn10300_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+       return &user_mn10300_native_view;
+}
+
+/*
+ * set the single-step bit
+ */
+void user_enable_single_step(struct task_struct *child)
 {
 #ifndef CONFIG_MN10300_USING_JTAG
        struct user *dummy = NULL;
        long tmp;
 
        tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
-       tmp &= ~EPSW_T;
+       tmp |= EPSW_T;
        put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
 #endif
 }
 
 /*
- * set the single step bit
+ * make sure the single-step bit is not set
  */
-void ptrace_enable(struct task_struct *child)
+void user_disable_single_step(struct task_struct *child)
 {
 #ifndef CONFIG_MN10300_USING_JTAG
        struct user *dummy = NULL;
        long tmp;
 
        tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
-       tmp |= EPSW_T;
+       tmp &= ~EPSW_T;
        put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
 #endif
 }
 
+void ptrace_disable(struct task_struct *child)
+{
+       user_disable_single_step(child);
+}
+
 /*
  * handle the arch-specific side of process tracing
  */
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-       struct fpu_state_struct fpu_state;
-       int i, ret;
+       unsigned long tmp;
+       int ret;
 
        switch (request) {
-       /* read the word at location addr. */
-       case PTRACE_PEEKTEXT: {
-               unsigned long tmp;
-               int copied;
-
-               copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-               ret = -EIO;
-               if (copied != sizeof(tmp))
-                       break;
-               ret = put_user(tmp, (unsigned long *) data);
-               break;
-       }
-
-       /* read the word at location addr. */
-       case PTRACE_PEEKDATA: {
-               unsigned long tmp;
-               int copied;
-
-               copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-               ret = -EIO;
-               if (copied != sizeof(tmp))
-                       break;
-               ret = put_user(tmp, (unsigned long *) data);
-               break;
-       }
-
        /* read the word at location addr in the USER area. */
-       case PTRACE_PEEKUSR: {
-               unsigned long tmp;
-
+       case PTRACE_PEEKUSR:
                ret = -EIO;
                if ((addr & 3) || addr < 0 ||
                    addr > sizeof(struct user) - 3)
@@ -179,17 +315,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                                             ptrace_regid_to_frame[addr]);
                ret = put_user(tmp, (unsigned long *) data);
                break;
-       }
-
-       /* write the word at location addr. */
-       case PTRACE_POKETEXT:
-       case PTRACE_POKEDATA:
-               if (access_process_vm(child, addr, &data, sizeof(data), 1) ==
-                   sizeof(data))
-                       ret = 0;
-               else
-                       ret = -EIO;
-               break;
 
                /* write the word at location addr in the USER area */
        case PTRACE_POKEUSR:
@@ -204,132 +329,32 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                                             data);
                break;
 
-               /* continue and stop at next (return from) syscall */
-       case PTRACE_SYSCALL:
-               /* restart after signal. */
-       case PTRACE_CONT:
-               ret = -EIO;
-               if ((unsigned long) data > _NSIG)
-                       break;
-               if (request == PTRACE_SYSCALL)
-                       set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               else
-                       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               child->exit_code = data;
-               ptrace_disable(child);
-               wake_up_process(child);
-               ret = 0;
-               break;
-
-               /*
-                * make the child exit
-                * - the best I can do is send it a sigkill
-                * - perhaps it should be put in the status that it wants to
-                *   exit
-                */
-       case PTRACE_KILL:
-               ret = 0;
-               if (child->exit_state == EXIT_ZOMBIE)   /* already dead */
-                       break;
-               child->exit_code = SIGKILL;
-               clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-               ptrace_disable(child);
-               wake_up_process(child);
-               break;
-
-       case PTRACE_SINGLESTEP: /* set the trap flag. */
-#ifndef CONFIG_MN10300_USING_JTAG
-               ret = -EIO;
-               if ((unsigned long) data > _NSIG)
-                       break;
-               clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               ptrace_enable(child);
-               child->exit_code = data;
-               wake_up_process(child);
-               ret = 0;
-#else
-               ret = -EINVAL;
-#endif
-               break;
-
-       case PTRACE_DETACH:     /* detach a process that was attached. */
-               ret = ptrace_detach(child, data);
-               break;
-
-               /* Get all gp regs from the child. */
-       case PTRACE_GETREGS: {
-               unsigned long tmp;
-
-               if (!access_ok(VERIFY_WRITE, (unsigned *) data, NR_PTREGS << 2)) {
-                       ret = -EIO;
-                       break;
-               }
-
-               for (i = 0; i < NR_PTREGS << 2; i += 4) {
-                       tmp = get_stack_long(child, ptrace_regid_to_frame[i]);
-                       __put_user(tmp, (unsigned long *) data);
-                       data += sizeof(tmp);
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-               unsigned long tmp;
-
-               if (!access_ok(VERIFY_READ, (unsigned long *)data,
-                              sizeof(struct pt_regs))) {
-                       ret = -EIO;
-                       break;
-               }
-
-               for (i = 0; i < NR_PTREGS << 2; i += 4) {
-                       __get_user(tmp, (unsigned long *) data);
-                       put_stack_long(child, ptrace_regid_to_frame[i], tmp);
-                       data += sizeof(tmp);
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_GETFPREGS: { /* Get the child FPU state. */
-               if (is_using_fpu(child)) {
-                       unlazy_fpu(child);
-                       fpu_state = child->thread.fpu_state;
-               } else {
-                       memset(&fpu_state, 0, sizeof(fpu_state));
-               }
-
-               ret = -EIO;
-               if (copy_to_user((void *) data, &fpu_state,
-                                sizeof(fpu_state)) == 0)
-                       ret = 0;
-               break;
-       }
-
-       case PTRACE_SETFPREGS: { /* Set the child FPU state. */
-               ret = -EFAULT;
-               if (copy_from_user(&fpu_state, (const void *) data,
-                                  sizeof(fpu_state)) == 0) {
-                       fpu_kill_state(child);
-                       child->thread.fpu_state = fpu_state;
-                       set_using_fpu(child);
-                       ret = 0;
-               }
-               break;
-       }
-
-       case PTRACE_SETOPTIONS: {
-               if (data & PTRACE_O_TRACESYSGOOD)
-                       child->ptrace |= PT_TRACESYSGOOD;
-               else
-                       child->ptrace &= ~PT_TRACESYSGOOD;
-               ret = 0;
-               break;
-       }
+       case PTRACE_GETREGS:    /* Get all integer regs from the child. */
+               return copy_regset_to_user(child, &user_mn10300_native_view,
+                                          REGSET_GENERAL,
+                                          0, NR_PTREGS * sizeof(long),
+                                          (void __user *)data);
+
+       case PTRACE_SETREGS:    /* Set all integer regs in the child. */
+               return copy_regset_from_user(child, &user_mn10300_native_view,
+                                            REGSET_GENERAL,
+                                            0, NR_PTREGS * sizeof(long),
+                                            (const void __user *)data);
+
+       case PTRACE_GETFPREGS:  /* Get the child FPU state. */
+               return copy_regset_to_user(child, &user_mn10300_native_view,
+                                          REGSET_FPU,
+                                          0, sizeof(struct fpu_state_struct),
+                                          (void __user *)data);
+
+       case PTRACE_SETFPREGS:  /* Set the child FPU state. */
+               return copy_regset_from_user(child, &user_mn10300_native_view,
+                                            REGSET_FPU,
+                                            0, sizeof(struct fpu_state_struct),
+                                            (const void __user *)data);
 
        default:
-               ret = -EIO;
+               ret = ptrace_request(child, request, addr, data);
                break;
        }
 
@@ -337,43 +362,26 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 }
 
 /*
- * notification of system call entry/exit
- * - triggered by current->work.syscall_trace
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
  */
-asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs)
 {
-#if 0
-       /* just in case... */
-       printk(KERN_DEBUG "[%d] syscall_%lu(%lx,%lx,%lx,%lx) = %lx\n",
-              current->pid,
-              regs->orig_d0,
-              regs->a0,
-              regs->d1,
-              regs->a3,
-              regs->a2,
-              regs->d0);
-       return;
-#endif
-
-       if (!test_thread_flag(TIF_SYSCALL_TRACE) &&
-           !test_thread_flag(TIF_SINGLESTEP))
-               return;
-       if (!(current->ptrace & PT_PTRACED))
-               return;
+       if (tracehook_report_syscall_entry(regs))
+               /* tracing decided this syscall should not happen, so
+                * We'll return a bogus call number to get an ENOSYS
+                * error, but leave the original number in
+                * regs->orig_d0
+                */
+               return ULONG_MAX;
 
-       /* the 0x80 provides a way for the tracing parent to distinguish
-          between a syscall stop and SIGTRAP delivery */
-       ptrace_notify(SIGTRAP |
-                     ((current->ptrace & PT_TRACESYSGOOD) &&
-                      !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0));
+       return regs->orig_d0;
+}
 
-       /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
-        */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+       tracehook_report_syscall_exit(regs, 0);
 }
index 841ca99..9f7572a 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/tty.h>
 #include <linux/personality.h>
 #include <linux/suspend.h>
+#include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
@@ -511,6 +512,9 @@ static void do_signal(struct pt_regs *regs)
                         * clear the TIF_RESTORE_SIGMASK flag */
                        if (test_thread_flag(TIF_RESTORE_SIGMASK))
                                clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+                       tracehook_signal_handler(signr, &info, &ka, regs,
+                                                test_thread_flag(TIF_SINGLESTEP));
                }
 
                return;
@@ -561,4 +565,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
        /* deal with pending signal delivery */
        if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
                do_signal(regs);
+
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(__frame);
+       }
 }
index 7892080..7095147 100644 (file)
@@ -165,24 +165,6 @@ ENTRY(itlb_aerror)
 ENTRY(dtlb_aerror)
        and     ~EPSW_NMID,epsw
        add     -4,sp
-       mov     d1,(sp)
-
-       movhu   (MMUFCR_DFC),d1                 # is it the initial valid write
-                                               # to this page?
-       and     MMUFCR_xFC_INITWR,d1
-       beq     dtlb_pagefault                  # jump if not
-
-       mov     (DPTEL),d1                      # set the dirty bit
-                                               # (don't replace with BSET!)
-       or      _PAGE_DIRTY,d1
-       mov     d1,(DPTEL)
-       mov     (sp),d1
-       add     4,sp
-       rti
-
-       ALIGN
-dtlb_pagefault:
-       mov     (sp),d1
        SAVE_ALL
        add     -4,sp                           # need to pass three params
 
index 33fac6b..d105f29 100644 (file)
@@ -174,6 +174,15 @@ config IOMMU_LEAK
          Add a simple leak tracer to the IOMMU code. This is useful when you
          are debugging a buggy device driver that leaks IOMMU mappings.
 
+config X86_DS_SELFTEST
+    bool "DS selftest"
+    default y
+    depends on DEBUG_KERNEL
+    depends on X86_DS
+       ---help---
+         Perform Debug Store selftests at boot time.
+         If in doubt, say "N".
+
 config HAVE_MMIOTRACE_SUPPORT
        def_bool y
 
index a505202..dcef387 100644 (file)
@@ -830,4 +830,5 @@ ia32_sys_call_table:
        .quad sys_inotify_init1
        .quad compat_sys_preadv
        .quad compat_sys_pwritev
+       .quad compat_sys_rt_tgsigqueueinfo      /* 335 */
 ia32_syscall_end:
index a8f672b..70dac19 100644 (file)
@@ -15,8 +15,8 @@
  * - buffer allocation (memory accounting)
  *
  *
- * Copyright (C) 2007-2008 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
+ * Copyright (C) 2007-2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
 #ifndef _ASM_X86_DS_H
@@ -83,8 +83,10 @@ enum ds_feature {
  * The interrupt threshold is independent from the overflow callback
  * to allow users to use their own overflow interrupt handling mechanism.
  *
- * task: the task to request recording for;
- *       NULL for per-cpu recording on the current cpu
+ * The function might sleep.
+ *
+ * task: the task to request recording for
+ * cpu:  the cpu to request recording for
  * base: the base pointer for the (non-pageable) buffer;
  * size: the size of the provided buffer in bytes
  * ovfl: pointer to a function to be called on buffer overflow;
@@ -93,19 +95,28 @@ enum ds_feature {
  *     -1 if no interrupt threshold is requested.
  * flags: a bit-mask of the above flags
  */
-extern struct bts_tracer *ds_request_bts(struct task_struct *task,
-                                        void *base, size_t size,
-                                        bts_ovfl_callback_t ovfl,
-                                        size_t th, unsigned int flags);
-extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-                                          void *base, size_t size,
-                                          pebs_ovfl_callback_t ovfl,
-                                          size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+                                             void *base, size_t size,
+                                             bts_ovfl_callback_t ovfl,
+                                             size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+                                            bts_ovfl_callback_t ovfl,
+                                            size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+                                               void *base, size_t size,
+                                               pebs_ovfl_callback_t ovfl,
+                                               size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
+                                              void *base, size_t size,
+                                              pebs_ovfl_callback_t ovfl,
+                                              size_t th, unsigned int flags);
 
 /*
  * Release BTS or PEBS resources
  * Suspend and resume BTS or PEBS tracing
  *
+ * Must be called with irq's enabled.
+ *
  * tracer: the tracer handle returned from ds_request_~()
  */
 extern void ds_release_bts(struct bts_tracer *tracer);
@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer);
 extern void ds_suspend_pebs(struct pebs_tracer *tracer);
 extern void ds_resume_pebs(struct pebs_tracer *tracer);
 
+/*
+ * Release BTS or PEBS resources
+ * Suspend and resume BTS or PEBS tracing
+ *
+ * Cpu tracers must call this on the traced cpu.
+ * Task tracers must call ds_release_~_noirq() for themselves.
+ *
+ * May be called with irq's disabled.
+ *
+ * Returns 0 if successful;
+ * -EPERM if the cpu tracer does not trace the current cpu.
+ * -EPERM if the task tracer does not trace itself.
+ *
+ * tracer: the tracer handle returned from ds_request_~()
+ */
+extern int ds_release_bts_noirq(struct bts_tracer *tracer);
+extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
+extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
+extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
+
 
 /*
  * The raw DS buffer state as it is used for BTS and PEBS recording.
@@ -170,9 +203,9 @@ struct bts_struct {
                } lbr;
                /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
                struct {
-                       __u64 jiffies;
+                       __u64 clock;
                        pid_t pid;
-               } timestamp;
+               } event;
        } variant;
 };
 
@@ -201,8 +234,12 @@ struct bts_trace {
 struct pebs_trace {
        struct ds_trace ds;
 
-       /* the PEBS reset value */
-       unsigned long long reset_value;
+       /* the number of valid counters in the below array */
+       unsigned int counters;
+
+#define MAX_PEBS_COUNTERS 4
+       /* the counter reset value */
+       unsigned long long counter_reset[MAX_PEBS_COUNTERS];
 };
 
 
@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer);
  * Returns 0 on success; -Eerrno on error
  *
  * tracer: the tracer handle returned from ds_request_pebs()
+ * counter: the index of the counter
  * value: the new counter reset value
  */
-extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
+extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
+                            unsigned int counter, u64 value);
 
 /*
  * Initialization
@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
  */
 extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
 
-/*
- * Task clone/init and cleanup work
- */
-extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
-extern void ds_exit_thread(struct task_struct *tsk);
-
 #else /* CONFIG_X86_DS */
 
 struct cpuinfo_x86;
 static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
 static inline void ds_switch_to(struct task_struct *prev,
                                struct task_struct *next) {}
-static inline void ds_copy_thread(struct task_struct *tsk,
-                                 struct task_struct *father) {}
-static inline void ds_exit_thread(struct task_struct *tsk) {}
 
 #endif /* CONFIG_X86_DS */
 #endif /* _ASM_X86_DS_H */
index 87ede2f..c776826 100644 (file)
@@ -462,14 +462,8 @@ struct thread_struct {
        unsigned                io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
        unsigned long   debugctlmsr;
-#ifdef CONFIG_X86_DS
-/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+       /* Debug Store context; see asm/ds.h */
        struct ds_context       *ds_ctx;
-#endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
-       unsigned int    bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long native_get_debugreg(int regno)
@@ -797,6 +791,21 @@ static inline unsigned long get_debugctlmsr(void)
     return debugctlmsr;
 }
 
+static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+{
+       u64 debugctlmsr = 0;
+       u32 val1, val2;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+       if (boot_cpu_data.x86 < 6)
+               return 0;
+#endif
+       rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+       debugctlmsr = val1 | ((u64)val2 << 32);
+
+       return debugctlmsr;
+}
+
 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 {
 #ifndef CONFIG_X86_DEBUGCTLMSR
@@ -806,6 +815,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
        wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
+static inline void update_debugctlmsr_on_cpu(int cpu,
+                                            unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+       if (boot_cpu_data.x86 < 6)
+               return;
+#endif
+       wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+                    (u32)((u64)debugctlmsr),
+                    (u32)((u64)debugctlmsr >> 32));
+}
+
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
index 624f133..0f0d908 100644 (file)
@@ -236,12 +236,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
 extern int do_set_thread_area(struct task_struct *p, int idx,
                              struct user_desc __user *info, int can_allocate);
 
-extern void x86_ptrace_untrace(struct task_struct *);
-extern void x86_ptrace_fork(struct task_struct *child,
-                           unsigned long clone_flags);
+#ifdef CONFIG_X86_PTRACE_BTS
+extern void ptrace_bts_untrace(struct task_struct *tsk);
 
-#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
-#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
+#define arch_ptrace_untrace(tsk)       ptrace_bts_untrace(tsk)
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 #endif /* __KERNEL__ */
 
index f729563..c4ee805 100644 (file)
@@ -67,6 +67,7 @@ static inline int user_termio_to_kernel_termios(struct ktermios *termios,
        SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);
        SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);
        SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);
+       get_user(termios->c_line, &termio->c_line);
        return copy_from_user(termios->c_cc, termio->c_cc, NCC);
 }
 
index 16a5c84..a5ecc9c 100644 (file)
@@ -17,7 +17,7 @@
 
 static inline void __native_flush_tlb(void)
 {
-       write_cr3(read_cr3());
+       native_write_cr3(native_read_cr3());
 }
 
 static inline void __native_flush_tlb_global(void)
@@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void)
         */
        raw_local_irq_save(flags);
 
-       cr4 = read_cr4();
+       cr4 = native_read_cr4();
        /* clear PGE */
-       write_cr4(cr4 & ~X86_CR4_PGE);
+       native_write_cr4(cr4 & ~X86_CR4_PGE);
        /* write old PGE again and flush TLBs */
-       write_cr4(cr4);
+       native_write_cr4(cr4);
 
        raw_local_irq_restore(flags);
 }
index 6e72d74..708dae6 100644 (file)
 #define __NR_inotify_init1     332
 #define __NR_preadv            333
 #define __NR_pwritev           334
+#define __NR_rt_tgsigqueueinfo 335
 
 #ifdef __KERNEL__
 
index f818294..4e2b054 100644 (file)
@@ -657,6 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 __SYSCALL(__NR_preadv, sys_preadv)
 #define __NR_pwritev                           296
 __SYSCALL(__NR_pwritev, sys_pwritev)
+#define __NR_rt_tgsigqueueinfo                 297
+__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 
 
 #ifndef __NO_STUBS
index 235f592..4f78bd6 100644 (file)
@@ -44,6 +44,7 @@ obj-y                         += process.o
 obj-y                          += i387.o xsave.o
 obj-y                          += ptrace.o
 obj-$(CONFIG_X86_DS)           += ds.o
+obj-$(CONFIG_X86_DS_SELFTEST)          += ds_selftest.o
 obj-$(CONFIG_X86_32)           += tls.o
 obj-$(CONFIG_IA32_EMULATION)   += tls.o
 obj-y                          += step.o
index 87b67e3..48bfe13 100644 (file)
  * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
-
-#include <asm/ds.h>
-
-#include <linux/errno.h>
+#include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/slab.h>
+#include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/kernel.h>
+#include <linux/trace_clock.h>
+
+#include <asm/ds.h>
 
+#include "ds_selftest.h"
 
 /*
- * The configuration for a particular DS hardware implementation.
+ * The configuration for a particular DS hardware implementation:
  */
 struct ds_configuration {
-       /* the name of the configuration */
-       const char *name;
-       /* the size of one pointer-typed field in the DS structure and
-          in the BTS and PEBS buffers in bytes;
-          this covers the first 8 DS fields related to buffer management. */
-       unsigned char  sizeof_field;
-       /* the size of a BTS/PEBS record in bytes */
-       unsigned char  sizeof_rec[2];
-       /* a series of bit-masks to control various features indexed
-        * by enum ds_feature */
-       unsigned long ctl[dsf_ctl_max];
+       /* The name of the configuration: */
+       const char              *name;
+
+       /* The size of pointer-typed fields in DS, BTS, and PEBS: */
+       unsigned char           sizeof_ptr_field;
+
+       /* The size of a BTS/PEBS record in bytes: */
+       unsigned char           sizeof_rec[2];
+
+       /* The number of pebs counter reset values in the DS structure. */
+       unsigned char           nr_counter_reset;
+
+       /* Control bit-masks indexed by enum ds_feature: */
+       unsigned long           ctl[dsf_ctl_max];
 };
-static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
+static struct ds_configuration ds_cfg __read_mostly;
+
+
+/* Maximal size of a DS configuration: */
+#define MAX_SIZEOF_DS          0x80
 
-#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
+/* Maximal size of a BTS record: */
+#define MAX_SIZEOF_BTS         (3 * 8)
 
-#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
-#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
-#define DS_ALIGNMENT (1 << 3)  /* BTS and PEBS buffer alignment */
+/* BTS and PEBS buffer alignment: */
+#define DS_ALIGNMENT           (1 << 3)
 
-#define BTS_CONTROL \
- (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
-  ds_cfg.ctl[dsf_bts_overflow])
+/* Number of buffer pointers in DS: */
+#define NUM_DS_PTR_FIELDS      8
 
+/* Size of a pebs reset value in DS: */
+#define PEBS_RESET_FIELD_SIZE  8
+
+/* Mask of control bits in the DS MSR register: */
+#define BTS_CONTROL                              \
+       ( ds_cfg.ctl[dsf_bts]                   | \
+         ds_cfg.ctl[dsf_bts_kernel]            | \
+         ds_cfg.ctl[dsf_bts_user]              | \
+         ds_cfg.ctl[dsf_bts_overflow] )
 
 /*
  * A BTS or PEBS tracer.
@@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
  * to identify tracers.
  */
 struct ds_tracer {
-       /* the DS context (partially) owned by this tracer */
-       struct ds_context *context;
-       /* the buffer provided on ds_request() and its size in bytes */
-       void *buffer;
-       size_t size;
+       /* The DS context (partially) owned by this tracer. */
+       struct ds_context       *context;
+       /* The buffer provided on ds_request() and its size in bytes. */
+       void                    *buffer;
+       size_t                  size;
 };
 
 struct bts_tracer {
-       /* the common DS part */
-       struct ds_tracer ds;
-       /* the trace including the DS configuration */
-       struct bts_trace trace;
-       /* buffer overflow notification function */
-       bts_ovfl_callback_t ovfl;
+       /* The common DS part: */
+       struct ds_tracer        ds;
+
+       /* The trace including the DS configuration: */
+       struct bts_trace        trace;
+
+       /* Buffer overflow notification function: */
+       bts_ovfl_callback_t     ovfl;
+
+       /* Active flags affecting trace collection. */
+       unsigned int            flags;
 };
 
 struct pebs_tracer {
-       /* the common DS part */
-       struct ds_tracer ds;
-       /* the trace including the DS configuration */
-       struct pebs_trace trace;
-       /* buffer overflow notification function */
-       pebs_ovfl_callback_t ovfl;
+       /* The common DS part: */
+       struct ds_tracer        ds;
+
+       /* The trace including the DS configuration: */
+       struct pebs_trace       trace;
+
+       /* Buffer overflow notification function: */
+       pebs_ovfl_callback_t    ovfl;
 };
 
 /*
@@ -97,6 +120,7 @@ struct pebs_tracer {
  *
  * The DS configuration consists of the following fields; different
  * architetures vary in the size of those fields.
+ *
  * - double-word aligned base linear address of the BTS buffer
  * - write pointer into the BTS buffer
  * - end linear address of the BTS buffer (one byte beyond the end of
@@ -135,21 +159,22 @@ enum ds_field {
 };
 
 enum ds_qualifier {
-       ds_bts  = 0,
+       ds_bts = 0,
        ds_pebs
 };
 
-static inline unsigned long ds_get(const unsigned char *base,
-                                  enum ds_qualifier qual, enum ds_field field)
+static inline unsigned long
+ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
 {
-       base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+       base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
        return *(unsigned long *)base;
 }
 
-static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
-                         enum ds_field field, unsigned long value)
+static inline void
+ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
+       unsigned long value)
 {
-       base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+       base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
        (*(unsigned long *)base) = value;
 }
 
@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
  */
 static DEFINE_SPINLOCK(ds_lock);
 
-
 /*
  * We either support (system-wide) per-cpu or per-thread allocation.
  * We distinguish the two based on the task_struct pointer, where a
@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock);
  */
 static atomic_t tracers = ATOMIC_INIT(0);
 
-static inline void get_tracer(struct task_struct *task)
+static inline int get_tracer(struct task_struct *task)
 {
-       if (task)
+       int error;
+
+       spin_lock_irq(&ds_lock);
+
+       if (task) {
+               error = -EPERM;
+               if (atomic_read(&tracers) < 0)
+                       goto out;
                atomic_inc(&tracers);
-       else
+       } else {
+               error = -EPERM;
+               if (atomic_read(&tracers) > 0)
+                       goto out;
                atomic_dec(&tracers);
+       }
+
+       error = 0;
+out:
+       spin_unlock_irq(&ds_lock);
+       return error;
 }
 
 static inline void put_tracer(struct task_struct *task)
@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task)
                atomic_inc(&tracers);
 }
 
-static inline int check_tracer(struct task_struct *task)
-{
-       return task ?
-               (atomic_read(&tracers) >= 0) :
-               (atomic_read(&tracers) <= 0);
-}
-
-
 /*
  * The DS context is either attached to a thread or to a cpu:
  * - in the former case, the thread_struct contains a pointer to the
@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task)
  * deallocated when the last user puts the context.
  */
 struct ds_context {
-       /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
-       unsigned char ds[MAX_SIZEOF_DS];
-       /* the owner of the BTS and PEBS configuration, respectively */
-       struct bts_tracer *bts_master;
-       struct pebs_tracer *pebs_master;
-       /* use count */
-       unsigned long count;
-       /* a pointer to the context location inside the thread_struct
-        * or the per_cpu context array */
-       struct ds_context **this;
-       /* a pointer to the task owning this context, or NULL, if the
-        * context is owned by a cpu */
-       struct task_struct *task;
-};
+       /* The DS configuration; goes into MSR_IA32_DS_AREA: */
+       unsigned char           ds[MAX_SIZEOF_DS];
+
+       /* The owner of the BTS and PEBS configuration, respectively: */
+       struct bts_tracer       *bts_master;
+       struct pebs_tracer      *pebs_master;
 
-static DEFINE_PER_CPU(struct ds_context *, system_context_array);
+       /* Use count: */
+       unsigned long           count;
 
-#define system_context per_cpu(system_context_array, smp_processor_id())
+       /* Pointer to the context pointer field: */
+       struct ds_context       **this;
+
+       /* The traced task; NULL for cpu tracing: */
+       struct task_struct      *task;
+
+       /* The traced cpu; only valid if task is NULL: */
+       int                     cpu;
+};
 
+static DEFINE_PER_CPU(struct ds_context *, cpu_context);
 
-static inline struct ds_context *ds_get_context(struct task_struct *task)
+
+static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
 {
        struct ds_context **p_context =
-               (task ? &task->thread.ds_ctx : &system_context);
+               (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
        struct ds_context *context = NULL;
        struct ds_context *new_context = NULL;
-       unsigned long irq;
 
        /* Chances are small that we already have a context. */
        new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
        if (!new_context)
                return NULL;
 
-       spin_lock_irqsave(&ds_lock, irq);
+       spin_lock_irq(&ds_lock);
 
        context = *p_context;
-       if (!context) {
+       if (likely(!context)) {
                context = new_context;
 
                context->this = p_context;
                context->task = task;
+               context->cpu = cpu;
                context->count = 0;
 
-               if (task)
-                       set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
-
-               if (!task || (task == current))
-                       wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
-
                *p_context = context;
        }
 
        context->count++;
 
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
 
        if (context != new_context)
                kfree(new_context);
@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
        return context;
 }
 
-static inline void ds_put_context(struct ds_context *context)
+static void ds_put_context(struct ds_context *context)
 {
+       struct task_struct *task;
        unsigned long irq;
 
        if (!context)
@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context)
 
        *(context->this) = NULL;
 
-       if (context->task)
-               clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+       task = context->task;
+
+       if (task)
+               clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
 
-       if (!context->task || (context->task == current))
-               wrmsrl(MSR_IA32_DS_AREA, 0);
+       /*
+        * We leave the (now dangling) pointer to the DS configuration in
+        * the DS_AREA msr. This is as good or as bad as replacing it with
+        * NULL - the hardware would crash if we enabled tracing.
+        *
+        * This saves us some problems with having to write an msr on a
+        * different cpu while preventing others from doing the same for the
+        * next context for that same cpu.
+        */
 
        spin_unlock_irqrestore(&ds_lock, irq);
 
+       /* The context might still be in use for context switching. */
+       if (task && (task != current))
+               wait_task_context_switch(task);
+
        kfree(context);
 }
 
+static void ds_install_ds_area(struct ds_context *context)
+{
+       unsigned long ds;
+
+       ds = (unsigned long)context->ds;
+
+       /*
+        * There is a race between the bts master and the pebs master.
+        *
+        * The thread/cpu access is synchronized via get/put_cpu() for
+        * task tracing and via wrmsr_on_cpu for cpu tracing.
+        *
+        * If bts and pebs are collected for the same task or same cpu,
+        * the same confiuration is written twice.
+        */
+       if (context->task) {
+               get_cpu();
+               if (context->task == current)
+                       wrmsrl(MSR_IA32_DS_AREA, ds);
+               set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+               put_cpu();
+       } else
+               wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
+                            (u32)((u64)ds), (u32)((u64)ds >> 32));
+}
 
 /*
  * Call the tracer's callback on a buffer overflow.
@@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
  * The remainder of any partially written record is zeroed out.
  *
  * context: the DS context
- * qual: the buffer type
- * record: the data to write
- * size: the size of the data
+ * qual:    the buffer type
+ * record:  the data to write
+ * size:    the size of the data
  */
 static int ds_write(struct ds_context *context, enum ds_qualifier qual,
                    const void *record, size_t size)
@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
                unsigned long write_size, adj_write_size;
 
                /*
-                * write as much as possible without producing an
+                * Write as much as possible without producing an
                 * overflow interrupt.
                 *
-                * interrupt_threshold must either be
+                * Interrupt_threshold must either be
                 * - bigger than absolute_maximum or
                 * - point to a record between buffer_base and absolute_maximum
                 *
-                * index points to a valid record.
+                * Index points to a valid record.
                 */
                base   = ds_get(context->ds, qual, ds_buffer_base);
                index  = ds_get(context->ds, qual, ds_index);
@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
 
                write_end = min(end, int_th);
 
-               /* if we are already beyond the interrupt threshold,
-                * we fill the entire buffer */
+               /*
+                * If we are already beyond the interrupt threshold,
+                * we fill the entire buffer.
+                */
                if (write_end <= index)
                        write_end = end;
 
@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
                adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
                adj_write_size *= ds_cfg.sizeof_rec[qual];
 
-               /* zero out trailing bytes */
+               /* Zero out trailing bytes. */
                memset((char *)index + write_size, 0,
                       adj_write_size - write_size);
                index += adj_write_size;
@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
  * Later architectures use 64bit pointers throughout, whereas earlier
  * architectures use 32bit pointers in 32bit mode.
  *
- * We compute the base address for the first 8 fields based on:
+ * We compute the base address for the fields based on:
  * - the field size stored in the DS configuration
  * - the relative field position
  *
@@ -431,23 +501,23 @@ enum bts_field {
        bts_to,
        bts_flags,
 
-       bts_qual = bts_from,
-       bts_jiffies = bts_to,
-       bts_pid = bts_flags,
+       bts_qual                = bts_from,
+       bts_clock               = bts_to,
+       bts_pid                 = bts_flags,
 
-       bts_qual_mask = (bts_qual_max - 1),
-       bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
+       bts_qual_mask           = (bts_qual_max - 1),
+       bts_escape              = ((unsigned long)-1 & ~bts_qual_mask)
 };
 
 static inline unsigned long bts_get(const char *base, enum bts_field field)
 {
-       base += (ds_cfg.sizeof_field * field);
+       base += (ds_cfg.sizeof_ptr_field * field);
        return *(unsigned long *)base;
 }
 
 static inline void bts_set(char *base, enum bts_field field, unsigned long val)
 {
-       base += (ds_cfg.sizeof_field * field);;
+       base += (ds_cfg.sizeof_ptr_field * field);;
        (*(unsigned long *)base) = val;
 }
 
@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val)
  *
  * return: bytes read/written on success; -Eerrno, otherwise
  */
-static int bts_read(struct bts_tracer *tracer, const void *at,
-                   struct bts_struct *out)
+static int
+bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
 {
        if (!tracer)
                return -EINVAL;
@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at,
        memset(out, 0, sizeof(*out));
        if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
                out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
-               out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
-               out->variant.timestamp.pid = bts_get(at, bts_pid);
+               out->variant.event.clock = bts_get(at, bts_clock);
+               out->variant.event.pid = bts_get(at, bts_pid);
        } else {
                out->qualifier = bts_branch;
                out->variant.lbr.from = bts_get(at, bts_from);
@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
        case bts_task_arrives:
        case bts_task_departs:
                bts_set(raw, bts_qual, (bts_escape | in->qualifier));
-               bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
-               bts_set(raw, bts_pid, in->variant.timestamp.pid);
+               bts_set(raw, bts_clock, in->variant.event.clock);
+               bts_set(raw, bts_pid, in->variant.event.pid);
                break;
        default:
                return -EINVAL;
@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
                             unsigned int flags) {
        unsigned long buffer, adj;
 
-       /* adjust the buffer address and size to meet alignment
+       /*
+        * Adjust the buffer address and size to meet alignment
         * constraints:
         * - buffer is double-word aligned
         * - size is multiple of record size
@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
        trace->begin = (void *)buffer;
        trace->top = trace->begin;
        trace->end = (void *)(buffer + size);
-       /* The value for 'no threshold' is -1, which will set the
+       /*
+        * The value for 'no threshold' is -1, which will set the
         * threshold outside of the buffer, just like we want it.
         */
+       ith *= ds_cfg.sizeof_rec[qual];
        trace->ith = (void *)(buffer + size - ith);
 
        trace->flags = flags;
@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
 
 static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
                      enum ds_qualifier qual, struct task_struct *task,
-                     void *base, size_t size, size_t th, unsigned int flags)
+                     int cpu, void *base, size_t size, size_t th)
 {
        struct ds_context *context;
        int error;
+       size_t req_size;
+
+       error = -EOPNOTSUPP;
+       if (!ds_cfg.sizeof_rec[qual])
+               goto out;
 
        error = -EINVAL;
        if (!base)
                goto out;
 
-       /* we require some space to do alignment adjustments below */
+       req_size = ds_cfg.sizeof_rec[qual];
+       /* We might need space for alignment adjustments. */
+       if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
+               req_size += DS_ALIGNMENT;
+
        error = -EINVAL;
-       if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
+       if (size < req_size)
                goto out;
 
        if (th != (size_t)-1) {
@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
        tracer->size = size;
 
        error = -ENOMEM;
-       context = ds_get_context(task);
+       context = ds_get_context(task, cpu);
        if (!context)
                goto out;
        tracer->context = context;
 
-       ds_init_ds_trace(trace, qual, base, size, th, flags);
+       /*
+        * Defer any tracer-specific initialization work for the context until
+        * context ownership has been clarified.
+        */
 
        error = 0;
  out:
        return error;
 }
 
-struct bts_tracer *ds_request_bts(struct task_struct *task,
-                                 void *base, size_t size,
-                                 bts_ovfl_callback_t ovfl, size_t th,
-                                 unsigned int flags)
+static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
+                                        void *base, size_t size,
+                                        bts_ovfl_callback_t ovfl, size_t th,
+                                        unsigned int flags)
 {
        struct bts_tracer *tracer;
-       unsigned long irq;
        int error;
 
+       /* Buffer overflow notification is not yet implemented. */
        error = -EOPNOTSUPP;
-       if (!ds_cfg.ctl[dsf_bts])
+       if (ovfl)
                goto out;
 
-       /* buffer overflow notification is not yet implemented */
-       error = -EOPNOTSUPP;
-       if (ovfl)
+       error = get_tracer(task);
+       if (error < 0)
                goto out;
 
        error = -ENOMEM;
        tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
        if (!tracer)
-               goto out;
+               goto out_put_tracer;
        tracer->ovfl = ovfl;
 
+       /* Do some more error checking and acquire a tracing context. */
        error = ds_request(&tracer->ds, &tracer->trace.ds,
-                          ds_bts, task, base, size, th, flags);
+                          ds_bts, task, cpu, base, size, th);
        if (error < 0)
                goto out_tracer;
 
-
-       spin_lock_irqsave(&ds_lock, irq);
-
-       error = -EPERM;
-       if (!check_tracer(task))
-               goto out_unlock;
-       get_tracer(task);
+       /* Claim the bts part of the tracing context we acquired above. */
+       spin_lock_irq(&ds_lock);
 
        error = -EPERM;
        if (tracer->ds.context->bts_master)
-               goto out_put_tracer;
+               goto out_unlock;
        tracer->ds.context->bts_master = tracer;
 
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
 
+       /*
+        * Now that we own the bts part of the context, let's complete the
+        * initialization for that part.
+        */
+       ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
+       ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+       ds_install_ds_area(tracer->ds.context);
 
        tracer->trace.read  = bts_read;
        tracer->trace.write = bts_write;
 
-       ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+       /* Start tracing. */
        ds_resume_bts(tracer);
 
        return tracer;
 
- out_put_tracer:
-       put_tracer(task);
  out_unlock:
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
        ds_put_context(tracer->ds.context);
  out_tracer:
        kfree(tracer);
+ out_put_tracer:
+       put_tracer(task);
  out:
        return ERR_PTR(error);
 }
 
-struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-                                   void *base, size_t size,
-                                   pebs_ovfl_callback_t ovfl, size_t th,
-                                   unsigned int flags)
+struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+                                      void *base, size_t size,
+                                      bts_ovfl_callback_t ovfl,
+                                      size_t th, unsigned int flags)
+{
+       return ds_request_bts(task, 0, base, size, ovfl, th, flags);
+}
+
+struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+                                     bts_ovfl_callback_t ovfl,
+                                     size_t th, unsigned int flags)
+{
+       return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
+}
+
+static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
+                                          void *base, size_t size,
+                                          pebs_ovfl_callback_t ovfl, size_t th,
+                                          unsigned int flags)
 {
        struct pebs_tracer *tracer;
-       unsigned long irq;
        int error;
 
-       /* buffer overflow notification is not yet implemented */
+       /* Buffer overflow notification is not yet implemented. */
        error = -EOPNOTSUPP;
        if (ovfl)
                goto out;
 
+       error = get_tracer(task);
+       if (error < 0)
+               goto out;
+
        error = -ENOMEM;
        tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
        if (!tracer)
-               goto out;
+               goto out_put_tracer;
        tracer->ovfl = ovfl;
 
+       /* Do some more error checking and acquire a tracing context. */
        error = ds_request(&tracer->ds, &tracer->trace.ds,
-                          ds_pebs, task, base, size, th, flags);
+                          ds_pebs, task, cpu, base, size, th);
        if (error < 0)
                goto out_tracer;
 
-       spin_lock_irqsave(&ds_lock, irq);
-
-       error = -EPERM;
-       if (!check_tracer(task))
-               goto out_unlock;
-       get_tracer(task);
+       /* Claim the pebs part of the tracing context we acquired above. */
+       spin_lock_irq(&ds_lock);
 
        error = -EPERM;
        if (tracer->ds.context->pebs_master)
-               goto out_put_tracer;
+               goto out_unlock;
        tracer->ds.context->pebs_master = tracer;
 
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
 
+       /*
+        * Now that we own the pebs part of the context, let's complete the
+        * initialization for that part.
+        */
+       ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
        ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
+       ds_install_ds_area(tracer->ds.context);
+
+       /* Start tracing. */
        ds_resume_pebs(tracer);
 
        return tracer;
 
- out_put_tracer:
-       put_tracer(task);
  out_unlock:
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
        ds_put_context(tracer->ds.context);
  out_tracer:
        kfree(tracer);
+ out_put_tracer:
+       put_tracer(task);
  out:
        return ERR_PTR(error);
 }
 
-void ds_release_bts(struct bts_tracer *tracer)
+struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+                                        void *base, size_t size,
+                                        pebs_ovfl_callback_t ovfl,
+                                        size_t th, unsigned int flags)
 {
-       if (!tracer)
-               return;
+       return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
+}
 
-       ds_suspend_bts(tracer);
+struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
+                                       pebs_ovfl_callback_t ovfl,
+                                       size_t th, unsigned int flags)
+{
+       return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
+}
+
+static void ds_free_bts(struct bts_tracer *tracer)
+{
+       struct task_struct *task;
+
+       task = tracer->ds.context->task;
 
        WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
        tracer->ds.context->bts_master = NULL;
 
-       put_tracer(tracer->ds.context->task);
+       /* Make sure tracing stopped and the tracer is not in use. */
+       if (task && (task != current))
+               wait_task_context_switch(task);
+
        ds_put_context(tracer->ds.context);
+       put_tracer(task);
 
        kfree(tracer);
 }
 
+void ds_release_bts(struct bts_tracer *tracer)
+{
+       might_sleep();
+
+       if (!tracer)
+               return;
+
+       ds_suspend_bts(tracer);
+       ds_free_bts(tracer);
+}
+
+int ds_release_bts_noirq(struct bts_tracer *tracer)
+{
+       struct task_struct *task;
+       unsigned long irq;
+       int error;
+
+       if (!tracer)
+               return 0;
+
+       task = tracer->ds.context->task;
+
+       local_irq_save(irq);
+
+       error = -EPERM;
+       if (!task &&
+           (tracer->ds.context->cpu != smp_processor_id()))
+               goto out;
+
+       error = -EPERM;
+       if (task && (task != current))
+               goto out;
+
+       ds_suspend_bts_noirq(tracer);
+       ds_free_bts(tracer);
+
+       error = 0;
+ out:
+       local_irq_restore(irq);
+       return error;
+}
+
+static void update_task_debugctlmsr(struct task_struct *task,
+                                   unsigned long debugctlmsr)
+{
+       task->thread.debugctlmsr = debugctlmsr;
+
+       get_cpu();
+       if (task == current)
+               update_debugctlmsr(debugctlmsr);
+       put_cpu();
+}
+
 void ds_suspend_bts(struct bts_tracer *tracer)
 {
        struct task_struct *task;
+       unsigned long debugctlmsr;
+       int cpu;
 
        if (!tracer)
                return;
 
+       tracer->flags = 0;
+
        task = tracer->ds.context->task;
+       cpu  = tracer->ds.context->cpu;
 
-       if (!task || (task == current))
-               update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
+       WARN_ON(!task && irqs_disabled());
 
-       if (task) {
-               task->thread.debugctlmsr &= ~BTS_CONTROL;
+       debugctlmsr = (task ?
+                      task->thread.debugctlmsr :
+                      get_debugctlmsr_on_cpu(cpu));
+       debugctlmsr &= ~BTS_CONTROL;
 
-               if (!task->thread.debugctlmsr)
-                       clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-       }
+       if (task)
+               update_task_debugctlmsr(task, debugctlmsr);
+       else
+               update_debugctlmsr_on_cpu(cpu, debugctlmsr);
 }
 
-void ds_resume_bts(struct bts_tracer *tracer)
+int ds_suspend_bts_noirq(struct bts_tracer *tracer)
 {
        struct task_struct *task;
-       unsigned long control;
+       unsigned long debugctlmsr, irq;
+       int cpu, error = 0;
 
        if (!tracer)
-               return;
+               return 0;
+
+       tracer->flags = 0;
 
        task = tracer->ds.context->task;
+       cpu  = tracer->ds.context->cpu;
+
+       local_irq_save(irq);
+
+       error = -EPERM;
+       if (!task && (cpu != smp_processor_id()))
+               goto out;
+
+       debugctlmsr = (task ?
+                      task->thread.debugctlmsr :
+                      get_debugctlmsr());
+       debugctlmsr &= ~BTS_CONTROL;
+
+       if (task)
+               update_task_debugctlmsr(task, debugctlmsr);
+       else
+               update_debugctlmsr(debugctlmsr);
+
+       error = 0;
+ out:
+       local_irq_restore(irq);
+       return error;
+}
+
+static unsigned long ds_bts_control(struct bts_tracer *tracer)
+{
+       unsigned long control;
 
        control = ds_cfg.ctl[dsf_bts];
        if (!(tracer->trace.ds.flags & BTS_KERNEL))
@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer)
        if (!(tracer->trace.ds.flags & BTS_USER))
                control |= ds_cfg.ctl[dsf_bts_user];
 
-       if (task) {
-               task->thread.debugctlmsr |= control;
-               set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-       }
-
-       if (!task || (task == current))
-               update_debugctlmsr(get_debugctlmsr() | control);
+       return control;
 }
 
-void ds_release_pebs(struct pebs_tracer *tracer)
+void ds_resume_bts(struct bts_tracer *tracer)
 {
+       struct task_struct *task;
+       unsigned long debugctlmsr;
+       int cpu;
+
        if (!tracer)
                return;
 
-       ds_suspend_pebs(tracer);
+       tracer->flags = tracer->trace.ds.flags;
+
+       task = tracer->ds.context->task;
+       cpu  = tracer->ds.context->cpu;
+
+       WARN_ON(!task && irqs_disabled());
+
+       debugctlmsr = (task ?
+                      task->thread.debugctlmsr :
+                      get_debugctlmsr_on_cpu(cpu));
+       debugctlmsr |= ds_bts_control(tracer);
+
+       if (task)
+               update_task_debugctlmsr(task, debugctlmsr);
+       else
+               update_debugctlmsr_on_cpu(cpu, debugctlmsr);
+}
+
+int ds_resume_bts_noirq(struct bts_tracer *tracer)
+{
+       struct task_struct *task;
+       unsigned long debugctlmsr, irq;
+       int cpu, error = 0;
+
+       if (!tracer)
+               return 0;
+
+       tracer->flags = tracer->trace.ds.flags;
+
+       task = tracer->ds.context->task;
+       cpu  = tracer->ds.context->cpu;
+
+       local_irq_save(irq);
+
+       error = -EPERM;
+       if (!task && (cpu != smp_processor_id()))
+               goto out;
+
+       debugctlmsr = (task ?
+                      task->thread.debugctlmsr :
+                      get_debugctlmsr());
+       debugctlmsr |= ds_bts_control(tracer);
+
+       if (task)
+               update_task_debugctlmsr(task, debugctlmsr);
+       else
+               update_debugctlmsr(debugctlmsr);
+
+       error = 0;
+ out:
+       local_irq_restore(irq);
+       return error;
+}
+
+static void ds_free_pebs(struct pebs_tracer *tracer)
+{
+       struct task_struct *task;
+
+       task = tracer->ds.context->task;
 
        WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
        tracer->ds.context->pebs_master = NULL;
 
-       put_tracer(tracer->ds.context->task);
        ds_put_context(tracer->ds.context);
+       put_tracer(task);
 
        kfree(tracer);
 }
 
+void ds_release_pebs(struct pebs_tracer *tracer)
+{
+       might_sleep();
+
+       if (!tracer)
+               return;
+
+       ds_suspend_pebs(tracer);
+       ds_free_pebs(tracer);
+}
+
+int ds_release_pebs_noirq(struct pebs_tracer *tracer)
+{
+       struct task_struct *task;
+       unsigned long irq;
+       int error;
+
+       if (!tracer)
+               return 0;
+
+       task = tracer->ds.context->task;
+
+       local_irq_save(irq);
+
+       error = -EPERM;
+       if (!task &&
+           (tracer->ds.context->cpu != smp_processor_id()))
+               goto out;
+
+       error = -EPERM;
+       if (task && (task != current))
+               goto out;
+
+       ds_suspend_pebs_noirq(tracer);
+       ds_free_pebs(tracer);
+
+       error = 0;
+ out:
+       local_irq_restore(irq);
+       return error;
+}
+
 void ds_suspend_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
+{
+       return 0;
+}
+
 void ds_resume_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
+{
+       return 0;
+}
+
 const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
 {
        if (!tracer)
@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
                return NULL;
 
        ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
-       tracer->trace.reset_value =
-               *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
+
+       tracer->trace.counters = ds_cfg.nr_counter_reset;
+       memcpy(tracer->trace.counter_reset,
+              tracer->ds.context->ds +
+              (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
+              ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
 
        return &tracer->trace;
 }
@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer)
 
        tracer->trace.ds.top = tracer->trace.ds.begin;
 
-       ds_set(tracer->ds.context->ds, ds_bts, ds_index,
+       ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
               (unsigned long)tracer->trace.ds.top);
 
        return 0;
 }
 
-int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
+int ds_set_pebs_reset(struct pebs_tracer *tracer,
+                     unsigned int counter, u64 value)
 {
        if (!tracer)
                return -EINVAL;
 
-       *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
+       if (ds_cfg.nr_counter_reset < counter)
+               return -EINVAL;
+
+       *(u64 *)(tracer->ds.context->ds +
+                (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
+                (counter * PEBS_RESET_FIELD_SIZE)) = value;
 
        return 0;
 }
@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = {
        .ctl[dsf_bts]           = (1 << 2) | (1 << 3),
        .ctl[dsf_bts_kernel]    = (1 << 5),
        .ctl[dsf_bts_user]      = (1 << 6),
-
-       .sizeof_field           = sizeof(long),
-       .sizeof_rec[ds_bts]     = sizeof(long) * 3,
-#ifdef __i386__
-       .sizeof_rec[ds_pebs]    = sizeof(long) * 10,
-#else
-       .sizeof_rec[ds_pebs]    = sizeof(long) * 18,
-#endif
+       .nr_counter_reset       = 1,
 };
 static const struct ds_configuration ds_cfg_pentium_m = {
        .name = "Pentium M",
        .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
-
-       .sizeof_field           = sizeof(long),
-       .sizeof_rec[ds_bts]     = sizeof(long) * 3,
-#ifdef __i386__
-       .sizeof_rec[ds_pebs]    = sizeof(long) * 10,
-#else
-       .sizeof_rec[ds_pebs]    = sizeof(long) * 18,
-#endif
+       .nr_counter_reset       = 1,
 };
 static const struct ds_configuration ds_cfg_core2_atom = {
        .name = "Core 2/Atom",
        .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
        .ctl[dsf_bts_kernel]    = (1 << 9),
        .ctl[dsf_bts_user]      = (1 << 10),
-
-       .sizeof_field           = 8,
-       .sizeof_rec[ds_bts]     = 8 * 3,
-       .sizeof_rec[ds_pebs]    = 8 * 18,
+       .nr_counter_reset       = 1,
+};
+static const struct ds_configuration ds_cfg_core_i7 = {
+       .name = "Core i7",
+       .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
+       .ctl[dsf_bts_kernel]    = (1 << 9),
+       .ctl[dsf_bts_user]      = (1 << 10),
+       .nr_counter_reset       = 4,
 };
 
 static void
-ds_configure(const struct ds_configuration *cfg)
+ds_configure(const struct ds_configuration *cfg,
+            struct cpuinfo_x86 *cpu)
 {
+       unsigned long nr_pebs_fields = 0;
+
+       printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
+
+#ifdef __i386__
+       nr_pebs_fields = 10;
+#else
+       nr_pebs_fields = 18;
+#endif
+
+       /*
+        * Starting with version 2, architectural performance
+        * monitoring supports a format specifier.
+        */
+       if ((cpuid_eax(0xa) & 0xff) > 1) {
+               unsigned long perf_capabilities, format;
+
+               rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+               format = (perf_capabilities >> 8) & 0xf;
+
+               switch (format) {
+               case 0:
+                       nr_pebs_fields = 18;
+                       break;
+               case 1:
+                       nr_pebs_fields = 22;
+                       break;
+               default:
+                       printk(KERN_INFO
+                              "[ds] unknown PEBS format: %lu\n", format);
+                       nr_pebs_fields = 0;
+                       break;
+               }
+       }
+
        memset(&ds_cfg, 0, sizeof(ds_cfg));
        ds_cfg = *cfg;
 
-       printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
+       ds_cfg.sizeof_ptr_field =
+               (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
+
+       ds_cfg.sizeof_rec[ds_bts]  = ds_cfg.sizeof_ptr_field * 3;
+       ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
 
-       if (!cpu_has_bts) {
-               ds_cfg.ctl[dsf_bts] = 0;
+       if (!cpu_has(cpu, X86_FEATURE_BTS)) {
+               ds_cfg.sizeof_rec[ds_bts] = 0;
                printk(KERN_INFO "[ds] bts not available\n");
        }
-       if (!cpu_has_pebs)
+       if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
+               ds_cfg.sizeof_rec[ds_pebs] = 0;
                printk(KERN_INFO "[ds] pebs not available\n");
+       }
+
+       printk(KERN_INFO "[ds] sizes: address: %u bit, ",
+              8 * ds_cfg.sizeof_ptr_field);
+       printk("bts/pebs record: %u/%u bytes\n",
+              ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
 
-       WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
+       WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
 }
 
 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 {
+       /* Only configure the first cpu. Others are identical. */
+       if (ds_cfg.name)
+               return;
+
        switch (c->x86) {
        case 0x6:
                switch (c->x86_model) {
                case 0x9:
                case 0xd: /* Pentium M */
-                       ds_configure(&ds_cfg_pentium_m);
+                       ds_configure(&ds_cfg_pentium_m, c);
                        break;
                case 0xf:
                case 0x17: /* Core2 */
                case 0x1c: /* Atom */
-                       ds_configure(&ds_cfg_core2_atom);
+                       ds_configure(&ds_cfg_core2_atom, c);
+                       break;
+               case 0x1a: /* Core i7 */
+                       ds_configure(&ds_cfg_core_i7, c);
                        break;
-               case 0x1a: /* i7 */
                default:
-                       /* sorry, don't know about them */
+                       /* Sorry, don't know about them. */
                        break;
                }
                break;
@@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
                case 0x0:
                case 0x1:
                case 0x2: /* Netburst */
-                       ds_configure(&ds_cfg_netburst);
+                       ds_configure(&ds_cfg_netburst, c);
                        break;
                default:
-                       /* sorry, don't know about them */
+                       /* Sorry, don't know about them. */
                        break;
                }
                break;
        default:
-               /* sorry, don't know about them */
+               /* Sorry, don't know about them. */
                break;
        }
 }
 
+static inline void ds_take_timestamp(struct ds_context *context,
+                                    enum bts_qualifier qualifier,
+                                    struct task_struct *task)
+{
+       struct bts_tracer *tracer = context->bts_master;
+       struct bts_struct ts;
+
+       /* Prevent compilers from reading the tracer pointer twice. */
+       barrier();
+
+       if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
+               return;
+
+       memset(&ts, 0, sizeof(ts));
+       ts.qualifier            = qualifier;
+       ts.variant.event.clock  = trace_clock_global();
+       ts.variant.event.pid    = task->pid;
+
+       bts_write(tracer, &ts);
+}
+
 /*
  * Change the DS configuration from tracing prev to tracing next.
  */
 void ds_switch_to(struct task_struct *prev, struct task_struct *next)
 {
-       struct ds_context *prev_ctx = prev->thread.ds_ctx;
-       struct ds_context *next_ctx = next->thread.ds_ctx;
+       struct ds_context *prev_ctx     = prev->thread.ds_ctx;
+       struct ds_context *next_ctx     = next->thread.ds_ctx;
+       unsigned long debugctlmsr       = next->thread.debugctlmsr;
+
+       /* Make sure all data is read before we start. */
+       barrier();
 
        if (prev_ctx) {
                update_debugctlmsr(0);
 
-               if (prev_ctx->bts_master &&
-                   (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-                       struct bts_struct ts = {
-                               .qualifier = bts_task_departs,
-                               .variant.timestamp.jiffies = jiffies_64,
-                               .variant.timestamp.pid = prev->pid
-                       };
-                       bts_write(prev_ctx->bts_master, &ts);
-               }
+               ds_take_timestamp(prev_ctx, bts_task_departs, prev);
        }
 
        if (next_ctx) {
-               if (next_ctx->bts_master &&
-                   (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-                       struct bts_struct ts = {
-                               .qualifier = bts_task_arrives,
-                               .variant.timestamp.jiffies = jiffies_64,
-                               .variant.timestamp.pid = next->pid
-                       };
-                       bts_write(next_ctx->bts_master, &ts);
-               }
+               ds_take_timestamp(next_ctx, bts_task_arrives, next);
 
                wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
        }
 
-       update_debugctlmsr(next->thread.debugctlmsr);
+       update_debugctlmsr(debugctlmsr);
 }
 
-void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
+static __init int ds_selftest(void)
 {
-       clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
-       tsk->thread.ds_ctx = NULL;
-}
+       if (ds_cfg.sizeof_rec[ds_bts]) {
+               int error;
 
-void ds_exit_thread(struct task_struct *tsk)
-{
+               error = ds_selftest_bts();
+               if (error) {
+                       WARN(1, "[ds] selftest failed. disabling bts.\n");
+                       ds_cfg.sizeof_rec[ds_bts] = 0;
+               }
+       }
+
+       if (ds_cfg.sizeof_rec[ds_pebs]) {
+               int error;
+
+               error = ds_selftest_pebs();
+               if (error) {
+                       WARN(1, "[ds] selftest failed. disabling pebs.\n");
+                       ds_cfg.sizeof_rec[ds_pebs] = 0;
+               }
+       }
+
+       return 0;
 }
+device_initcall(ds_selftest);
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
new file mode 100644 (file)
index 0000000..6bc7c19
--- /dev/null
@@ -0,0 +1,408 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#include "ds_selftest.h"
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+
+#include <asm/ds.h>
+
+
+#define BUFFER_SIZE            521     /* Intentionally chose an odd size. */
+#define SMALL_BUFFER_SIZE      24      /* A single bts entry. */
+
+struct ds_selftest_bts_conf {
+       struct bts_tracer *tracer;
+       int error;
+       int (*suspend)(struct bts_tracer *);
+       int (*resume)(struct bts_tracer *);
+};
+
+static int ds_selftest_bts_consistency(const struct bts_trace *trace)
+{
+       int error = 0;
+
+       if (!trace) {
+               printk(KERN_CONT "failed to access trace...");
+               /* Bail out. Other tests are pointless. */
+               return -1;
+       }
+
+       if (!trace->read) {
+               printk(KERN_CONT "bts read not available...");
+               error = -1;
+       }
+
+       /* Do some sanity checks on the trace configuration. */
+       if (!trace->ds.n) {
+               printk(KERN_CONT "empty bts buffer...");
+               error = -1;
+       }
+       if (!trace->ds.size) {
+               printk(KERN_CONT "bad bts trace setup...");
+               error = -1;
+       }
+       if (trace->ds.end !=
+           (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
+               printk(KERN_CONT "bad bts buffer setup...");
+               error = -1;
+       }
+       /*
+        * We allow top in [begin; end], since its not clear when the
+        * overflow adjustment happens: after the increment or before the
+        * write.
+        */
+       if ((trace->ds.top < trace->ds.begin) ||
+           (trace->ds.end < trace->ds.top)) {
+               printk(KERN_CONT "bts top out of bounds...");
+               error = -1;
+       }
+
+       return error;
+}
+
+static int ds_selftest_bts_read(struct bts_tracer *tracer,
+                               const struct bts_trace *trace,
+                               const void *from, const void *to)
+{
+       const unsigned char *at;
+
+       /*
+        * Check a few things which do not belong to this test.
+        * They should be covered by other tests.
+        */
+       if (!trace)
+               return -1;
+
+       if (!trace->read)
+               return -1;
+
+       if (to < from)
+               return -1;
+
+       if (from < trace->ds.begin)
+               return -1;
+
+       if (trace->ds.end < to)
+               return -1;
+
+       if (!trace->ds.size)
+               return -1;
+
+       /* Now to the test itself. */
+       for (at = from; (void *)at < to; at += trace->ds.size) {
+               struct bts_struct bts;
+               unsigned long index;
+               int error;
+
+               if (((void *)at - trace->ds.begin) % trace->ds.size) {
+                       printk(KERN_CONT
+                              "read from non-integer index...");
+                       return -1;
+               }
+               index = ((void *)at - trace->ds.begin) / trace->ds.size;
+
+               memset(&bts, 0, sizeof(bts));
+               error = trace->read(tracer, at, &bts);
+               if (error < 0) {
+                       printk(KERN_CONT
+                              "error reading bts trace at [%lu] (0x%p)...",
+                              index, at);
+                       return error;
+               }
+
+               switch (bts.qualifier) {
+               case BTS_BRANCH:
+                       break;
+               default:
+                       printk(KERN_CONT
+                              "unexpected bts entry %llu at [%lu] (0x%p)...",
+                              bts.qualifier, index, at);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static void ds_selftest_bts_cpu(void *arg)
+{
+       struct ds_selftest_bts_conf *conf = arg;
+       const struct bts_trace *trace;
+       void *top;
+
+       if (IS_ERR(conf->tracer)) {
+               conf->error = PTR_ERR(conf->tracer);
+               conf->tracer = NULL;
+
+               printk(KERN_CONT
+                      "initialization failed (err: %d)...", conf->error);
+               return;
+       }
+
+       /* We should meanwhile have enough trace. */
+       conf->error = conf->suspend(conf->tracer);
+       if (conf->error < 0)
+               return;
+
+       /* Let's see if we can access the trace. */
+       trace = ds_read_bts(conf->tracer);
+
+       conf->error = ds_selftest_bts_consistency(trace);
+       if (conf->error < 0)
+               return;
+
+       /* If everything went well, we should have a few trace entries. */
+       if (trace->ds.top == trace->ds.begin) {
+               /*
+                * It is possible but highly unlikely that we got a
+                * buffer overflow and end up at exactly the same
+                * position we started from.
+                * Let's issue a warning, but continue.
+                */
+               printk(KERN_CONT "no trace/overflow...");
+       }
+
+       /* Let's try to read the trace we collected. */
+       conf->error =
+               ds_selftest_bts_read(conf->tracer, trace,
+                                    trace->ds.begin, trace->ds.top);
+       if (conf->error < 0)
+               return;
+
+       /*
+        * Let's read the trace again.
+        * Since we suspended tracing, we should get the same result.
+        */
+       top = trace->ds.top;
+
+       trace = ds_read_bts(conf->tracer);
+       conf->error = ds_selftest_bts_consistency(trace);
+       if (conf->error < 0)
+               return;
+
+       if (top != trace->ds.top) {
+               printk(KERN_CONT "suspend not working...");
+               conf->error = -1;
+               return;
+       }
+
+       /* Let's collect some more trace - see if resume is working. */
+       conf->error = conf->resume(conf->tracer);
+       if (conf->error < 0)
+               return;
+
+       conf->error = conf->suspend(conf->tracer);
+       if (conf->error < 0)
+               return;
+
+       trace = ds_read_bts(conf->tracer);
+
+       conf->error = ds_selftest_bts_consistency(trace);
+       if (conf->error < 0)
+               return;
+
+       if (trace->ds.top == top) {
+               /*
+                * It is possible but highly unlikely that we got a
+                * buffer overflow and end up at exactly the same
+                * position we started from.
+                * Let's issue a warning and check the full trace.
+                */
+               printk(KERN_CONT
+                      "no resume progress/overflow...");
+
+               conf->error =
+                       ds_selftest_bts_read(conf->tracer, trace,
+                                            trace->ds.begin, trace->ds.end);
+       } else if (trace->ds.top < top) {
+               /*
+                * We had a buffer overflow - the entire buffer should
+                * contain trace records.
+                */
+               conf->error =
+                       ds_selftest_bts_read(conf->tracer, trace,
+                                            trace->ds.begin, trace->ds.end);
+       } else {
+               /*
+                * It is quite likely that the buffer did not overflow.
+                * Let's just check the delta trace.
+                */
+               conf->error =
+                       ds_selftest_bts_read(conf->tracer, trace, top,
+                                            trace->ds.top);
+       }
+       if (conf->error < 0)
+               return;
+
+       conf->error = 0;
+}
+
+static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
+{
+       ds_suspend_bts(tracer);
+       return 0;
+}
+
+static int ds_resume_bts_wrap(struct bts_tracer *tracer)
+{
+       ds_resume_bts(tracer);
+       return 0;
+}
+
+static void ds_release_bts_noirq_wrap(void *tracer)
+{
+       (void)ds_release_bts_noirq(tracer);
+}
+
+static int ds_selftest_bts_bad_release_noirq(int cpu,
+                                            struct bts_tracer *tracer)
+{
+       int error = -EPERM;
+
+       /* Try to release the tracer on the wrong cpu. */
+       get_cpu();
+       if (cpu != smp_processor_id()) {
+               error = ds_release_bts_noirq(tracer);
+               if (error != -EPERM)
+                       printk(KERN_CONT "release on wrong cpu...");
+       }
+       put_cpu();
+
+       return error ? 0 : -1;
+}
+
+static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
+{
+       struct bts_tracer *tracer;
+       int error;
+
+       /* Try to request cpu tracing while task tracing is active. */
+       tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
+                                   (size_t)-1, BTS_KERNEL);
+       error = PTR_ERR(tracer);
+       if (!IS_ERR(tracer)) {
+               ds_release_bts(tracer);
+               error = 0;
+       }
+
+       if (error != -EPERM)
+               printk(KERN_CONT "cpu/task tracing overlap...");
+
+       return error ? 0 : -1;
+}
+
+static int ds_selftest_bts_bad_request_task(void *buffer)
+{
+       struct bts_tracer *tracer;
+       int error;
+
+       /* Try to request cpu tracing while task tracing is active. */
+       tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
+                                   (size_t)-1, BTS_KERNEL);
+       error = PTR_ERR(tracer);
+       if (!IS_ERR(tracer)) {
+               error = 0;
+               ds_release_bts(tracer);
+       }
+
+       if (error != -EPERM)
+               printk(KERN_CONT "task/cpu tracing overlap...");
+
+       return error ? 0 : -1;
+}
+
+int ds_selftest_bts(void)
+{
+       struct ds_selftest_bts_conf conf;
+       unsigned char buffer[BUFFER_SIZE], *small_buffer;
+       unsigned long irq;
+       int cpu;
+
+       printk(KERN_INFO "[ds] bts selftest...");
+       conf.error = 0;
+
+       small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               conf.suspend = ds_suspend_bts_wrap;
+               conf.resume = ds_resume_bts_wrap;
+               conf.tracer =
+                       ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+                                          NULL, (size_t)-1, BTS_KERNEL);
+               ds_selftest_bts_cpu(&conf);
+               if (conf.error >= 0)
+                       conf.error = ds_selftest_bts_bad_request_task(buffer);
+               ds_release_bts(conf.tracer);
+               if (conf.error < 0)
+                       goto out;
+
+               conf.suspend = ds_suspend_bts_noirq;
+               conf.resume = ds_resume_bts_noirq;
+               conf.tracer =
+                       ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+                                          NULL, (size_t)-1, BTS_KERNEL);
+               smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
+               if (conf.error >= 0) {
+                       conf.error =
+                               ds_selftest_bts_bad_release_noirq(cpu,
+                                                                 conf.tracer);
+                       /* We must not release the tracer twice. */
+                       if (conf.error < 0)
+                               conf.tracer = NULL;
+               }
+               if (conf.error >= 0)
+                       conf.error = ds_selftest_bts_bad_request_task(buffer);
+               smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
+                                        conf.tracer, 1);
+               if (conf.error < 0)
+                       goto out;
+       }
+
+       conf.suspend = ds_suspend_bts_wrap;
+       conf.resume = ds_resume_bts_wrap;
+       conf.tracer =
+               ds_request_bts_task(current, buffer, BUFFER_SIZE,
+                                   NULL, (size_t)-1, BTS_KERNEL);
+       ds_selftest_bts_cpu(&conf);
+       if (conf.error >= 0)
+               conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+       ds_release_bts(conf.tracer);
+       if (conf.error < 0)
+               goto out;
+
+       conf.suspend = ds_suspend_bts_noirq;
+       conf.resume = ds_resume_bts_noirq;
+       conf.tracer =
+               ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
+                                  NULL, (size_t)-1, BTS_KERNEL);
+       local_irq_save(irq);
+       ds_selftest_bts_cpu(&conf);
+       if (conf.error >= 0)
+               conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+       ds_release_bts_noirq(conf.tracer);
+       local_irq_restore(irq);
+       if (conf.error < 0)
+               goto out;
+
+       conf.error = 0;
+ out:
+       put_online_cpus();
+       printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
+
+       return conf.error;
+}
+
+int ds_selftest_pebs(void)
+{
+       return 0;
+}
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
new file mode 100644 (file)
index 0000000..2ba8745
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#ifdef CONFIG_X86_DS_SELFTEST
+extern int ds_selftest_bts(void);
+extern int ds_selftest_pebs(void);
+#else
+static inline int ds_selftest_bts(void) { return 0; }
+static inline int ds_selftest_pebs(void) { return 0; }
+#endif
index bb01ce0..1c17d7c 100644 (file)
@@ -147,27 +147,14 @@ END(ftrace_graph_caller)
 GLOBAL(return_to_handler)
        subq  $80, %rsp
 
+       /* Save the return values */
        movq %rax, (%rsp)
-       movq %rcx, 8(%rsp)
-       movq %rdx, 16(%rsp)
-       movq %rsi, 24(%rsp)
-       movq %rdi, 32(%rsp)
-       movq %r8, 40(%rsp)
-       movq %r9, 48(%rsp)
-       movq %r10, 56(%rsp)
-       movq %r11, 64(%rsp)
+       movq %rdx, 8(%rsp)
 
        call ftrace_return_to_handler
 
        movq %rax, 72(%rsp)
-       movq 64(%rsp), %r11
-       movq 56(%rsp), %r10
-       movq 48(%rsp), %r9
-       movq 40(%rsp), %r8
-       movq 32(%rsp), %rdi
-       movq 24(%rsp), %rsi
-       movq 16(%rsp), %rdx
-       movq 8(%rsp), %rcx
+       movq 8(%rsp), %rdx
        movq (%rsp), %rax
        addq $72, %rsp
        retq
index e22d63b..3bb2be1 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/ds.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -47,6 +48,8 @@ void free_thread_xstate(struct task_struct *tsk)
                kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
                tsk->thread.xstate = NULL;
        }
+
+       WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
 
 void free_thread_info(struct thread_info *ti)
@@ -85,8 +88,6 @@ void exit_thread(void)
                put_cpu();
                kfree(bp);
        }
-
-       ds_exit_thread(current);
 }
 
 void flush_thread(void)
index c60924b..59f4524 100644 (file)
@@ -287,7 +287,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
                p->thread.io_bitmap_max = 0;
        }
 
-       ds_copy_thread(p, current);
+       clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+       p->thread.ds_ctx = NULL;
 
        clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
        p->thread.debugctlmsr = 0;
index 45f010f..ebefb54 100644 (file)
@@ -332,7 +332,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
                        goto out;
        }
 
-       ds_copy_thread(p, me);
+       clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+       p->thread.ds_ctx = NULL;
 
        clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
        p->thread.debugctlmsr = 0;
index 23b7c8f..09ecbde 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/signal.h>
+#include <linux/workqueue.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target,
 }
 
 #ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * A branch trace store context.
+ *
+ * Contexts may only be installed by ptrace_bts_config() and only for
+ * ptraced tasks.
+ *
+ * Contexts are destroyed when the tracee is detached from the tracer.
+ * The actual destruction work requires interrupts enabled, so the
+ * work is deferred and will be scheduled during __ptrace_unlink().
+ *
+ * Contexts hold an additional task_struct reference on the traced
+ * task, as well as a reference on the tracer's mm.
+ *
+ * Ptrace already holds a task_struct for the duration of ptrace operations,
+ * but since destruction is deferred, it may be executed after both
+ * tracer and tracee exited.
+ */
+struct bts_context {
+       /* The branch trace handle. */
+       struct bts_tracer       *tracer;
+
+       /* The buffer used to store the branch trace and its size. */
+       void                    *buffer;
+       unsigned int            size;
+
+       /* The mm that paid for the above buffer. */
+       struct mm_struct        *mm;
+
+       /* The task this context belongs to. */
+       struct task_struct      *task;
+
+       /* The signal to send on a bts buffer overflow. */
+       unsigned int            bts_ovfl_signal;
+
+       /* The work struct to destroy a context. */
+       struct work_struct      work;
+};
+
+static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
+{
+       void *buffer = NULL;
+       int err = -ENOMEM;
+
+       err = account_locked_memory(current->mm, current->signal->rlim, size);
+       if (err < 0)
+               return err;
+
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
+               goto out_refund;
+
+       context->buffer = buffer;
+       context->size = size;
+       context->mm = get_task_mm(current);
+
+       return 0;
+
+ out_refund:
+       refund_locked_memory(current->mm, size);
+       return err;
+}
+
+static inline void free_bts_buffer(struct bts_context *context)
+{
+       if (!context->buffer)
+               return;
+
+       kfree(context->buffer);
+       context->buffer = NULL;
+
+       refund_locked_memory(context->mm, context->size);
+       context->size = 0;
+
+       mmput(context->mm);
+       context->mm = NULL;
+}
+
+static void free_bts_context_work(struct work_struct *w)
+{
+       struct bts_context *context;
+
+       context = container_of(w, struct bts_context, work);
+
+       ds_release_bts(context->tracer);
+       put_task_struct(context->task);
+       free_bts_buffer(context);
+       kfree(context);
+}
+
+static inline void free_bts_context(struct bts_context *context)
+{
+       INIT_WORK(&context->work, free_bts_context_work);
+       schedule_work(&context->work);
+}
+
+static inline struct bts_context *alloc_bts_context(struct task_struct *task)
+{
+       struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
+       if (context) {
+               context->task = task;
+               task->bts = context;
+
+               get_task_struct(task);
+       }
+
+       return context;
+}
+
 static int ptrace_bts_read_record(struct task_struct *child, size_t index,
                                  struct bts_struct __user *out)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
        struct bts_struct bts;
        const unsigned char *at;
        int error;
 
-       trace = ds_read_bts(child->bts);
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        at = trace->ds.top - ((index + 1) * trace->ds.size);
        if ((void *)at < trace->ds.begin)
@@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
        if (!trace->read)
                return -EOPNOTSUPP;
 
-       error = trace->read(child->bts, at, &bts);
+       error = trace->read(context->tracer, at, &bts);
        if (error < 0)
                return error;
 
@@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child,
                            long size,
                            struct bts_struct __user *out)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
        const unsigned char *at;
        int error, drained = 0;
 
-       trace = ds_read_bts(child->bts);
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        if (!trace->read)
                return -EOPNOTSUPP;
@@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child,
        for (at = trace->ds.begin; (void *)at < trace->ds.top;
             out++, drained++, at += trace->ds.size) {
                struct bts_struct bts;
-               int error;
 
-               error = trace->read(child->bts, at, &bts);
+               error = trace->read(context->tracer, at, &bts);
                if (error < 0)
                        return error;
 
@@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child,
 
        memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-       error = ds_reset_bts(child->bts);
+       error = ds_reset_bts(context->tracer);
        if (error < 0)
                return error;
 
        return drained;
 }
 
-static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
-{
-       child->bts_buffer = alloc_locked_buffer(size);
-       if (!child->bts_buffer)
-               return -ENOMEM;
-
-       child->bts_size = size;
-
-       return 0;
-}
-
-static void ptrace_bts_free_buffer(struct task_struct *child)
-{
-       free_locked_buffer(child->bts_buffer, child->bts_size);
-       child->bts_buffer = NULL;
-       child->bts_size = 0;
-}
-
 static int ptrace_bts_config(struct task_struct *child,
                             long cfg_size,
                             const struct ptrace_bts_config __user *ucfg)
 {
+       struct bts_context *context;
        struct ptrace_bts_config cfg;
        unsigned int flags = 0;
 
@@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child,
        if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
                return -EFAULT;
 
-       if (child->bts) {
-               ds_release_bts(child->bts);
-               child->bts = NULL;
-       }
+       context = child->bts;
+       if (!context)
+               context = alloc_bts_context(child);
+       if (!context)
+               return -ENOMEM;
 
        if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
                if (!cfg.signal)
                        return -EINVAL;
 
-               child->thread.bts_ovfl_signal = cfg.signal;
                return -EOPNOTSUPP;
+               context->bts_ovfl_signal = cfg.signal;
        }
 
-       if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
-           (cfg.size != child->bts_size)) {
-               int error;
+       ds_release_bts(context->tracer);
+       context->tracer = NULL;
 
-               ptrace_bts_free_buffer(child);
+       if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+               int err;
 
-               error = ptrace_bts_allocate_buffer(child, cfg.size);
-               if (error < 0)
-                       return error;
+               free_bts_buffer(context);
+               if (!cfg.size)
+                       return 0;
+
+               err = alloc_bts_buffer(context, cfg.size);
+               if (err < 0)
+                       return err;
        }
 
        if (cfg.flags & PTRACE_BTS_O_TRACE)
@@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child,
        if (cfg.flags & PTRACE_BTS_O_SCHED)
                flags |= BTS_TIMESTAMPS;
 
-       child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
-                                   /* ovfl = */ NULL, /* th = */ (size_t)-1,
-                                   flags);
-       if (IS_ERR(child->bts)) {
-               int error = PTR_ERR(child->bts);
-
-               ptrace_bts_free_buffer(child);
-               child->bts = NULL;
+       context->tracer =
+               ds_request_bts_task(child, context->buffer, context->size,
+                                   NULL, (size_t)-1, flags);
+       if (unlikely(IS_ERR(context->tracer))) {
+               int error = PTR_ERR(context->tracer);
 
+               free_bts_buffer(context);
+               context->tracer = NULL;
                return error;
        }
 
@@ -727,20 +832,25 @@ static int ptrace_bts_status(struct task_struct *child,
                             long cfg_size,
                             struct ptrace_bts_config __user *ucfg)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
        struct ptrace_bts_config cfg;
 
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
        if (cfg_size < sizeof(cfg))
                return -EIO;
 
-       trace = ds_read_bts(child->bts);
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        memset(&cfg, 0, sizeof(cfg));
-       cfg.size = trace->ds.end - trace->ds.begin;
-       cfg.signal = child->thread.bts_ovfl_signal;
-       cfg.bts_size = sizeof(struct bts_struct);
+       cfg.size        = trace->ds.end - trace->ds.begin;
+       cfg.signal      = context->bts_ovfl_signal;
+       cfg.bts_size    = sizeof(struct bts_struct);
 
        if (cfg.signal)
                cfg.flags |= PTRACE_BTS_O_SIGNAL;
@@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child,
 
 static int ptrace_bts_clear(struct task_struct *child)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
 
-       trace = ds_read_bts(child->bts);
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-       return ds_reset_bts(child->bts);
+       return ds_reset_bts(context->tracer);
 }
 
 static int ptrace_bts_size(struct task_struct *child)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
 
-       trace = ds_read_bts(child->bts);
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
 
-static void ptrace_bts_fork(struct task_struct *tsk)
-{
-       tsk->bts = NULL;
-       tsk->bts_buffer = NULL;
-       tsk->bts_size = 0;
-       tsk->thread.bts_ovfl_signal = 0;
-}
-
-static void ptrace_bts_untrace(struct task_struct *child)
+/*
+ * Called from __ptrace_unlink() after the child has been moved back
+ * to its original parent.
+ */
+void ptrace_bts_untrace(struct task_struct *child)
 {
        if (unlikely(child->bts)) {
-               ds_release_bts(child->bts);
+               free_bts_context(child->bts);
                child->bts = NULL;
-
-               /* We cannot update total_vm and locked_vm since
-                  child's mm is already gone. But we can reclaim the
-                  memory. */
-               kfree(child->bts_buffer);
-               child->bts_buffer = NULL;
-               child->bts_size = 0;
        }
 }
-
-static void ptrace_bts_detach(struct task_struct *child)
-{
-       /*
-        * Ptrace_detach() races with ptrace_untrace() in case
-        * the child dies and is reaped by another thread.
-        *
-        * We only do the memory accounting at this point and
-        * leave the buffer deallocation and the bts tracer
-        * release to ptrace_bts_untrace() which will be called
-        * later on with tasklist_lock held.
-        */
-       release_locked_buffer(child->bts_buffer, child->bts_size);
-}
-#else
-static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-static inline void ptrace_bts_detach(struct task_struct *child) {}
-static inline void ptrace_bts_untrace(struct task_struct *child) {}
 #endif /* CONFIG_X86_PTRACE_BTS */
 
-void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-       ptrace_bts_fork(child);
-}
-
-void x86_ptrace_untrace(struct task_struct *child)
-{
-       ptrace_bts_untrace(child);
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-       ptrace_bts_detach(child);
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
index f7bddc2..4aaf7e4 100644 (file)
@@ -20,7 +20,7 @@ save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
 
 static int save_stack_stack(void *data, char *name)
 {
-       return -1;
+       return 0;
 }
 
 static void save_stack_address(void *data, unsigned long addr, int reliable)
index ff5c873..734f92c 100644 (file)
@@ -334,3 +334,4 @@ ENTRY(sys_call_table)
        .long sys_inotify_init1
        .long sys_preadv
        .long sys_pwritev
+       .long sys_rt_tgsigqueueinfo     /* 335 */
index 50dc802..16ccbd7 100644 (file)
@@ -32,7 +32,7 @@ struct kmmio_fault_page {
        struct list_head list;
        struct kmmio_fault_page *release_next;
        unsigned long page; /* location of the fault page */
-       bool old_presence; /* page presence prior to arming */
+       pteval_t old_presence; /* page presence prior to arming */
        bool armed;
 
        /*
@@ -97,60 +97,62 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
 static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
 {
        struct list_head *head;
-       struct kmmio_fault_page *p;
+       struct kmmio_fault_page *f;
 
        page &= PAGE_MASK;
        head = kmmio_page_list(page);
-       list_for_each_entry_rcu(p, head, list) {
-               if (p->page == page)
-                       return p;
+       list_for_each_entry_rcu(f, head, list) {
+               if (f->page == page)
+                       return f;
        }
        return NULL;
 }
 
-static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
+static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
 {
        pmdval_t v = pmd_val(*pmd);
-       *old = !!(v & _PAGE_PRESENT);
-       v &= ~_PAGE_PRESENT;
-       if (present)
-               v |= _PAGE_PRESENT;
+       if (clear) {
+               *old = v & _PAGE_PRESENT;
+               v &= ~_PAGE_PRESENT;
+       } else  /* presume this has been called with clear==true previously */
+               v |= *old;
        set_pmd(pmd, __pmd(v));
 }
 
-static void set_pte_presence(pte_t *pte, bool present, bool *old)
+static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
 {
        pteval_t v = pte_val(*pte);
-       *old = !!(v & _PAGE_PRESENT);
-       v &= ~_PAGE_PRESENT;
-       if (present)
-               v |= _PAGE_PRESENT;
+       if (clear) {
+               *old = v & _PAGE_PRESENT;
+               v &= ~_PAGE_PRESENT;
+       } else  /* presume this has been called with clear==true previously */
+               v |= *old;
        set_pte_atomic(pte, __pte(v));
 }
 
-static int set_page_presence(unsigned long addr, bool present, bool *old)
+static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 {
        unsigned int level;
-       pte_t *pte = lookup_address(addr, &level);
+       pte_t *pte = lookup_address(f->page, &level);
 
        if (!pte) {
-               pr_err("kmmio: no pte for page 0x%08lx\n", addr);
+               pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
                return -1;
        }
 
        switch (level) {
        case PG_LEVEL_2M:
-               set_pmd_presence((pmd_t *)pte, present, old);
+               clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
                break;
        case PG_LEVEL_4K:
-               set_pte_presence(pte, present, old);
+               clear_pte_presence(pte, clear, &f->old_presence);
                break;
        default:
                pr_err("kmmio: unexpected page level 0x%x.\n", level);
                return -1;
        }
 
-       __flush_tlb_one(addr);
+       __flush_tlb_one(f->page);
        return 0;
 }
 
@@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
        WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
        if (f->armed) {
                pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
-                                       f->page, f->count, f->old_presence);
+                                       f->page, f->count, !!f->old_presence);
        }
-       ret = set_page_presence(f->page, false, &f->old_presence);
+       ret = clear_page_presence(f, true);
        WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
        f->armed = true;
        return ret;
@@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
 /** Restore the given page to saved presence state. */
 static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
 {
-       bool tmp;
-       int ret = set_page_presence(f->page, f->old_presence, &tmp);
+       int ret = clear_page_presence(f, false);
        WARN_ONCE(ret < 0,
                        KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
        f->armed = false;
@@ -310,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
        struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
 
        if (!ctx->active) {
-               pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+               /*
+                * debug traps without an active context are due to either
+                * something external causing them (f.e. using a debugger while
+                * mmio tracing enabled), or erroneous behaviour
+                */
+               pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
                                                        smp_processor_id());
                goto out;
        }
@@ -439,12 +445,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
                                                head,
                                                struct kmmio_delayed_release,
                                                rcu);
-       struct kmmio_fault_page *p = dr->release_list;
-       while (p) {
-               struct kmmio_fault_page *next = p->release_next;
-               BUG_ON(p->count);
-               kfree(p);
-               p = next;
+       struct kmmio_fault_page *f = dr->release_list;
+       while (f) {
+               struct kmmio_fault_page *next = f->release_next;
+               BUG_ON(f->count);
+               kfree(f);
+               f = next;
        }
        kfree(dr);
 }
@@ -453,19 +459,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
 {
        struct kmmio_delayed_release *dr =
                container_of(head, struct kmmio_delayed_release, rcu);
-       struct kmmio_fault_page *p = dr->release_list;
+       struct kmmio_fault_page *f = dr->release_list;
        struct kmmio_fault_page **prevp = &dr->release_list;
        unsigned long flags;
 
        spin_lock_irqsave(&kmmio_lock, flags);
-       while (p) {
-               if (!p->count) {
-                       list_del_rcu(&p->list);
-                       prevp = &p->release_next;
+       while (f) {
+               if (!f->count) {
+                       list_del_rcu(&f->list);
+                       prevp = &f->release_next;
                } else {
-                       *prevp = p->release_next;
+                       *prevp = f->release_next;
                }
-               p = p->release_next;
+               f = f->release_next;
        }
        spin_unlock_irqrestore(&kmmio_lock, flags);
 
@@ -528,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 }
 EXPORT_SYMBOL(unregister_kmmio_probe);
 
-static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
-                                                               void *args)
+static int
+kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
 {
        struct die_args *arg = args;
 
@@ -544,11 +550,23 @@ static struct notifier_block nb_die = {
        .notifier_call = kmmio_die_notifier
 };
 
-static int __init init_kmmio(void)
+int kmmio_init(void)
 {
        int i;
+
        for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
                INIT_LIST_HEAD(&kmmio_page_table[i]);
+
        return register_die_notifier(&nb_die);
 }
-fs_initcall(init_kmmio); /* should be before device_initcall() */
+
+void kmmio_cleanup(void)
+{
+       int i;
+
+       unregister_die_notifier(&nb_die);
+       for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
+               WARN_ONCE(!list_empty(&kmmio_page_table[i]),
+                       KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
+       }
+}
index c9342ed..132772a 100644 (file)
@@ -451,6 +451,7 @@ void enable_mmiotrace(void)
 
        if (nommiotrace)
                pr_info(NAME "MMIO tracing disabled.\n");
+       kmmio_init();
        enter_uniprocessor();
        spin_lock_irq(&trace_lock);
        atomic_inc(&mmiotrace_enabled);
@@ -473,6 +474,7 @@ void disable_mmiotrace(void)
 
        clear_trace_list(); /* guarantees: no more kmmio callbacks */
        leave_uniprocessor();
+       kmmio_cleanup();
        pr_info(NAME "disabled.\n");
 out:
        mutex_unlock(&mmiotrace_mutex);
index 202864a..3b285e6 100644 (file)
@@ -356,14 +356,11 @@ static void exit_sysfs(void)
 #define exit_sysfs() do { } while (0)
 #endif /* CONFIG_PM */
 
-static int p4force;
-module_param(p4force, int, 0);
-
 static int __init p4_init(char **cpu_type)
 {
        __u8 cpu_model = boot_cpu_data.x86_model;
 
-       if (!p4force && (cpu_model > 6 || cpu_model == 5))
+       if (cpu_model > 6 || cpu_model == 5)
                return 0;
 
 #ifndef CONFIG_SMP
@@ -389,10 +386,25 @@ static int __init p4_init(char **cpu_type)
        return 0;
 }
 
+static int force_arch_perfmon;
+static int force_cpu_type(const char *str, struct kernel_param *kp)
+{
+       if (!strcmp(str, "archperfmon")) {
+               force_arch_perfmon = 1;
+               printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
+       }
+
+       return 0;
+}
+module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
+
 static int __init ppro_init(char **cpu_type)
 {
        __u8 cpu_model = boot_cpu_data.x86_model;
 
+       if (force_arch_perfmon && cpu_has_arch_perfmon)
+               return 0;
+
        switch (cpu_model) {
        case 0 ... 2:
                *cpu_type = "i386/ppro";
@@ -414,6 +426,13 @@ static int __init ppro_init(char **cpu_type)
        case 15: case 23:
                *cpu_type = "i386/core_2";
                break;
+       case 26:
+               arch_perfmon_setup_counters();
+               *cpu_type = "i386/core_i7";
+               break;
+       case 28:
+               *cpu_type = "i386/atom";
+               break;
        default:
                /* Unknown */
                return 0;
index c89883b..648f15c 100644 (file)
 #include <linux/task_io_accounting_ops.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
-#include <trace/block.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/block.h>
 
 #include "blk.h"
 
-DEFINE_TRACE(block_plug);
-DEFINE_TRACE(block_unplug_io);
-DEFINE_TRACE(block_unplug_timer);
-DEFINE_TRACE(block_getrq);
-DEFINE_TRACE(block_sleeprq);
-DEFINE_TRACE(block_rq_requeue);
-DEFINE_TRACE(block_bio_backmerge);
-DEFINE_TRACE(block_bio_frontmerge);
-DEFINE_TRACE(block_bio_queue);
-DEFINE_TRACE(block_rq_complete);
-DEFINE_TRACE(block_remap);     /* Also used in drivers/md/dm.c */
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
 
@@ -1277,7 +1269,7 @@ static inline void blk_partition_remap(struct bio *bio)
                bio->bi_bdev = bdev->bd_contains;
 
                trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
-                                   bdev->bd_dev, bio->bi_sector,
+                                   bdev->bd_dev,
                                    bio->bi_sector - p->start_sect);
        }
 }
@@ -1446,8 +1438,7 @@ static inline void __generic_make_request(struct bio *bio)
                        goto end_io;
 
                if (old_sector != -1)
-                       trace_block_remap(q, bio, old_dev, bio->bi_sector,
-                                           old_sector);
+                       trace_block_remap(q, bio, old_dev, old_sector);
 
                trace_block_bio_queue(q, bio);
 
@@ -1741,10 +1732,14 @@ static int __end_that_request_first(struct request *req, int error,
        trace_block_rq_complete(req->q, req);
 
        /*
-        * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
-        * sense key with us all the way through
+        * For fs requests, rq is just carrier of independent bio's
+        * and each partial completion should be handled separately.
+        * Reset per-request error on each partial completion.
+        *
+        * TODO: tj: This is too subtle.  It would be better to let
+        * low level drivers do what they see fit.
         */
-       if (!blk_pc_request(req))
+       if (blk_fs_request(req))
                req->errors = 0;
 
        if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
index 3ff9bba..26f9ec2 100644 (file)
@@ -383,16 +383,21 @@ struct kobj_type blk_queue_ktype = {
 int blk_register_queue(struct gendisk *disk)
 {
        int ret;
+       struct device *dev = disk_to_dev(disk);
 
        struct request_queue *q = disk->queue;
 
        if (WARN_ON(!q))
                return -ENXIO;
 
+       ret = blk_trace_init_sysfs(dev);
+       if (ret)
+               return ret;
+
        if (!q->request_fn)
                return 0;
 
-       ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
+       ret = kobject_add(&q->kobj, kobject_get(&dev->kobj),
                          "%s", "queue");
        if (ret < 0)
                return ret;
index f87615d..f8c218c 100644 (file)
@@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg)
        memcpy(&buts.name, &cbuts.name, 32);
 
        mutex_lock(&bdev->bd_mutex);
-       ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts);
+       ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
        mutex_unlock(&bdev->bd_mutex);
        if (ret)
                return ret;
index 7073a90..e220f0c 100644 (file)
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
-DEFINE_TRACE(block_rq_abort);
-
 /*
  * Merge hash stuff.
  */
@@ -55,9 +54,6 @@ static const int elv_hash_shift = 6;
 #define rq_hash_key(rq)                ((rq)->sector + (rq)->nr_sectors)
 #define ELV_ON_HASH(rq)                (!hlist_unhashed(&(rq)->hash))
 
-DEFINE_TRACE(block_rq_insert);
-DEFINE_TRACE(block_rq_issue);
-
 /*
  * Query io scheduler to see if the current process issuing bio may be
  * merged with rq.
index af761dc..4895f0e 100644 (file)
@@ -277,8 +277,8 @@ static int hci_uart_tty_open(struct tty_struct *tty)
        /* FIXME: why is this needed. Note don't use ldisc_ref here as the
           open path is before the ldisc is referencable */
 
-       if (tty->ldisc.ops->flush_buffer)
-               tty->ldisc.ops->flush_buffer(tty);
+       if (tty->ldisc->ops->flush_buffer)
+               tty->ldisc->ops->flush_buffer(tty);
        tty_driver_flush_buffer(tty);
 
        return 0;
@@ -463,7 +463,6 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file * file,
                                clear_bit(HCI_UART_PROTO_SET, &hu->flags);
                                return err;
                        }
-                       tty->low_latency = 1;
                } else
                        return -EBUSY;
                break;
index 735bbe2..02ecfd5 100644 (file)
@@ -97,6 +97,19 @@ config DEVKMEM
          kind of kernel debugging operations.
          When in doubt, say "N".
 
+config BFIN_JTAG_COMM
+       tristate "Blackfin JTAG Communication"
+       depends on BLACKFIN
+       help
+         Add support for emulating a TTY device over the Blackfin JTAG.
+
+         To compile this driver as a module, choose M here: the
+         module will be called bfin_jtag_comm.
+
+config BFIN_JTAG_COMM_CONSOLE
+       bool "Console on Blackfin JTAG"
+       depends on BFIN_JTAG_COMM=y
+
 config SERIAL_NONSTANDARD
        bool "Non-standard serial port support"
        depends on HAS_IOMEM
index 9caf5b5..189efcf 100644 (file)
@@ -13,6 +13,7 @@ obj-$(CONFIG_LEGACY_PTYS)     += pty.o
 obj-$(CONFIG_UNIX98_PTYS)      += pty.o
 obj-y                          += misc.o
 obj-$(CONFIG_VT)               += vt_ioctl.o vc_screen.o selection.o keyboard.o
+obj-$(CONFIG_BFIN_JTAG_COMM)   += bfin_jtag_comm.o
 obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
 obj-$(CONFIG_HW_CONSOLE)       += vt.o defkeymap.o
 obj-$(CONFIG_AUDIT)            += tty_audit.o
diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c
new file mode 100644 (file)
index 0000000..44c113d
--- /dev/null
@@ -0,0 +1,365 @@
+/*
+ * TTY over Blackfin JTAG Communication
+ *
+ * Copyright 2008-2009 Analog Devices Inc.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <asm/atomic.h>
+
+/* See the Debug/Emulation chapter in the HRM */
+#define EMUDOF   0x00000001    /* EMUDAT_OUT full & valid */
+#define EMUDIF   0x00000002    /* EMUDAT_IN full & valid */
+#define EMUDOOVF 0x00000004    /* EMUDAT_OUT overflow */
+#define EMUDIOVF 0x00000008    /* EMUDAT_IN overflow */
+
+#define DRV_NAME "bfin-jtag-comm"
+#define DEV_NAME "ttyBFJC"
+
+#define pr_init(fmt, args...) ({ static const __initdata char __fmt[] = fmt; printk(__fmt, ## args); })
+#define debug(fmt, args...) pr_debug(DRV_NAME ": " fmt, ## args)
+
+static inline uint32_t bfin_write_emudat(uint32_t emudat)
+{
+       __asm__ __volatile__("emudat = %0;" : : "d"(emudat));
+       return emudat;
+}
+
+static inline uint32_t bfin_read_emudat(void)
+{
+       uint32_t emudat;
+       __asm__ __volatile__("%0 = emudat;" : "=d"(emudat));
+       return emudat;
+}
+
+static inline uint32_t bfin_write_emudat_chars(char a, char b, char c, char d)
+{
+       return bfin_write_emudat((a << 0) | (b << 8) | (c << 16) | (d << 24));
+}
+
+#define CIRC_SIZE 2048 /* see comment in tty_io.c:do_tty_write() */
+#define CIRC_MASK (CIRC_SIZE - 1)
+#define circ_empty(circ)     ((circ)->head == (circ)->tail)
+#define circ_free(circ)      CIRC_SPACE((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_cnt(circ)       CIRC_CNT((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_byte(circ, idx) ((circ)->buf[(idx) & CIRC_MASK])
+
+static struct tty_driver *bfin_jc_driver;
+static struct task_struct *bfin_jc_kthread;
+static struct tty_struct * volatile bfin_jc_tty;
+static unsigned long bfin_jc_count;
+static DEFINE_MUTEX(bfin_jc_tty_mutex);
+static volatile struct circ_buf bfin_jc_write_buf;
+
+static int
+bfin_jc_emudat_manager(void *arg)
+{
+       uint32_t inbound_len = 0, outbound_len = 0;
+
+       while (!kthread_should_stop()) {
+               /* no one left to give data to, so sleep */
+               if (bfin_jc_tty == NULL && circ_empty(&bfin_jc_write_buf)) {
+                       debug("waiting for readers\n");
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule();
+                       __set_current_state(TASK_RUNNING);
+               }
+
+               /* no data available, so just chill */
+               if (!(bfin_read_DBGSTAT() & EMUDIF) && circ_empty(&bfin_jc_write_buf)) {
+                       debug("waiting for data (in_len = %i) (circ: %i %i)\n",
+                               inbound_len, bfin_jc_write_buf.tail, bfin_jc_write_buf.head);
+                       if (inbound_len)
+                               schedule();
+                       else
+                               schedule_timeout_interruptible(HZ);
+                       continue;
+               }
+
+               /* if incoming data is ready, eat it */
+               if (bfin_read_DBGSTAT() & EMUDIF) {
+                       struct tty_struct *tty;
+                       mutex_lock(&bfin_jc_tty_mutex);
+                       tty = (struct tty_struct *)bfin_jc_tty;
+                       if (tty != NULL) {
+                               uint32_t emudat = bfin_read_emudat();
+                               if (inbound_len == 0) {
+                                       debug("incoming length: 0x%08x\n", emudat);
+                                       inbound_len = emudat;
+                               } else {
+                                       size_t num_chars = (4 <= inbound_len ? 4 : inbound_len);
+                                       debug("  incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars);
+                                       inbound_len -= num_chars;
+                                       tty_insert_flip_string(tty, (unsigned char *)&emudat, num_chars);
+                                       tty_flip_buffer_push(tty);
+                               }
+                       }
+                       mutex_unlock(&bfin_jc_tty_mutex);
+               }
+
+               /* if outgoing data is ready, post it */
+               if (!(bfin_read_DBGSTAT() & EMUDOF) && !circ_empty(&bfin_jc_write_buf)) {
+                       if (outbound_len == 0) {
+                               outbound_len = circ_cnt(&bfin_jc_write_buf);
+                               bfin_write_emudat(outbound_len);
+                               debug("outgoing length: 0x%08x\n", outbound_len);
+                       } else {
+                               struct tty_struct *tty;
+                               int tail = bfin_jc_write_buf.tail;
+                               size_t ate = (4 <= outbound_len ? 4 : outbound_len);
+                               uint32_t emudat =
+                               bfin_write_emudat_chars(
+                                       circ_byte(&bfin_jc_write_buf, tail + 0),
+                                       circ_byte(&bfin_jc_write_buf, tail + 1),
+                                       circ_byte(&bfin_jc_write_buf, tail + 2),
+                                       circ_byte(&bfin_jc_write_buf, tail + 3)
+                               );
+                               bfin_jc_write_buf.tail += ate;
+                               outbound_len -= ate;
+                               mutex_lock(&bfin_jc_tty_mutex);
+                               tty = (struct tty_struct *)bfin_jc_tty;
+                               if (tty)
+                                       tty_wakeup(tty);
+                               mutex_unlock(&bfin_jc_tty_mutex);
+                               debug("  outgoing data: 0x%08x (pushing %zu)\n", emudat, ate);
+                       }
+               }
+       }
+
+       __set_current_state(TASK_RUNNING);
+       return 0;
+}
+
+static int
+bfin_jc_open(struct tty_struct *tty, struct file *filp)
+{
+       mutex_lock(&bfin_jc_tty_mutex);
+       debug("open %lu\n", bfin_jc_count);
+       ++bfin_jc_count;
+       bfin_jc_tty = tty;
+       wake_up_process(bfin_jc_kthread);
+       mutex_unlock(&bfin_jc_tty_mutex);
+       return 0;
+}
+
+static void
+bfin_jc_close(struct tty_struct *tty, struct file *filp)
+{
+       mutex_lock(&bfin_jc_tty_mutex);
+       debug("close %lu\n", bfin_jc_count);
+       if (--bfin_jc_count == 0)
+               bfin_jc_tty = NULL;
+       wake_up_process(bfin_jc_kthread);
+       mutex_unlock(&bfin_jc_tty_mutex);
+}
+
+/* XXX: we dont handle the put_char() case where we must handle count = 1 */
+static int
+bfin_jc_circ_write(const unsigned char *buf, int count)
+{
+       int i;
+       count = min(count, circ_free(&bfin_jc_write_buf));
+       debug("going to write chunk of %i bytes\n", count);
+       for (i = 0; i < count; ++i)
+               circ_byte(&bfin_jc_write_buf, bfin_jc_write_buf.head + i) = buf[i];
+       bfin_jc_write_buf.head += i;
+       return i;
+}
+
+#ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE
+# define acquire_console_sem()
+# define release_console_sem()
+#endif
+static int
+bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count)
+{
+       int i;
+       acquire_console_sem();
+       i = bfin_jc_circ_write(buf, count);
+       release_console_sem();
+       wake_up_process(bfin_jc_kthread);
+       return i;
+}
+
+static void
+bfin_jc_flush_chars(struct tty_struct *tty)
+{
+       wake_up_process(bfin_jc_kthread);
+}
+
+static int
+bfin_jc_write_room(struct tty_struct *tty)
+{
+       return circ_free(&bfin_jc_write_buf);
+}
+
+static int
+bfin_jc_chars_in_buffer(struct tty_struct *tty)
+{
+       return circ_cnt(&bfin_jc_write_buf);
+}
+
+static void
+bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+       unsigned long expire = jiffies + timeout;
+       while (!circ_empty(&bfin_jc_write_buf)) {
+               if (signal_pending(current))
+                       break;
+               if (time_after(jiffies, expire))
+                       break;
+       }
+}
+
+static struct tty_operations bfin_jc_ops = {
+       .open            = bfin_jc_open,
+       .close           = bfin_jc_close,
+       .write           = bfin_jc_write,
+       /*.put_char        = bfin_jc_put_char,*/
+       .flush_chars     = bfin_jc_flush_chars,
+       .write_room      = bfin_jc_write_room,
+       .chars_in_buffer = bfin_jc_chars_in_buffer,
+       .wait_until_sent = bfin_jc_wait_until_sent,
+};
+
+static int __init bfin_jc_init(void)
+{
+       int ret;
+
+       bfin_jc_kthread = kthread_create(bfin_jc_emudat_manager, NULL, DRV_NAME);
+       if (IS_ERR(bfin_jc_kthread))
+               return PTR_ERR(bfin_jc_kthread);
+
+       ret = -ENOMEM;
+
+       bfin_jc_write_buf.head = bfin_jc_write_buf.tail = 0;
+       bfin_jc_write_buf.buf = kmalloc(CIRC_SIZE, GFP_KERNEL);
+       if (!bfin_jc_write_buf.buf)
+               goto err;
+
+       bfin_jc_driver = alloc_tty_driver(1);
+       if (!bfin_jc_driver)
+               goto err;
+
+       bfin_jc_driver->owner        = THIS_MODULE;
+       bfin_jc_driver->driver_name  = DRV_NAME;
+       bfin_jc_driver->name         = DEV_NAME;
+       bfin_jc_driver->type         = TTY_DRIVER_TYPE_SERIAL;
+       bfin_jc_driver->subtype      = SERIAL_TYPE_NORMAL;
+       bfin_jc_driver->init_termios = tty_std_termios;
+       tty_set_operations(bfin_jc_driver, &bfin_jc_ops);
+
+       ret = tty_register_driver(bfin_jc_driver);
+       if (ret)
+               goto err;
+
+       pr_init(KERN_INFO DRV_NAME ": initialized\n");
+
+       return 0;
+
+ err:
+       put_tty_driver(bfin_jc_driver);
+       kfree(bfin_jc_write_buf.buf);
+       kthread_stop(bfin_jc_kthread);
+       return ret;
+}
+module_init(bfin_jc_init);
+
+static void __exit bfin_jc_exit(void)
+{
+       kthread_stop(bfin_jc_kthread);
+       kfree(bfin_jc_write_buf.buf);
+       tty_unregister_driver(bfin_jc_driver);
+       put_tty_driver(bfin_jc_driver);
+}
+module_exit(bfin_jc_exit);
+
+#if defined(CONFIG_BFIN_JTAG_COMM_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
+static void
+bfin_jc_straight_buffer_write(const char *buf, unsigned count)
+{
+       unsigned ate = 0;
+       while (bfin_read_DBGSTAT() & EMUDOF)
+               continue;
+       bfin_write_emudat(count);
+       while (ate < count) {
+               while (bfin_read_DBGSTAT() & EMUDOF)
+                       continue;
+               bfin_write_emudat_chars(buf[ate], buf[ate+1], buf[ate+2], buf[ate+3]);
+               ate += 4;
+       }
+}
+#endif
+
+#ifdef CONFIG_BFIN_JTAG_COMM_CONSOLE
+static void
+bfin_jc_console_write(struct console *co, const char *buf, unsigned count)
+{
+       if (bfin_jc_kthread == NULL)
+               bfin_jc_straight_buffer_write(buf, count);
+       else
+               bfin_jc_circ_write(buf, count);
+}
+
+static struct tty_driver *
+bfin_jc_console_device(struct console *co, int *index)
+{
+       *index = co->index;
+       return bfin_jc_driver;
+}
+
+static struct console bfin_jc_console = {
+       .name    = DEV_NAME,
+       .write   = bfin_jc_console_write,
+       .device  = bfin_jc_console_device,
+       .flags   = CON_ANYTIME | CON_PRINTBUFFER,
+       .index   = -1,
+};
+
+static int __init bfin_jc_console_init(void)
+{
+       register_console(&bfin_jc_console);
+       return 0;
+}
+console_initcall(bfin_jc_console_init);
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK
+static void __init
+bfin_jc_early_write(struct console *co, const char *buf, unsigned int count)
+{
+       bfin_jc_straight_buffer_write(buf, count);
+}
+
+static struct __initdata console bfin_jc_early_console = {
+       .name   = "early_BFJC",
+       .write   = bfin_jc_early_write,
+       .flags   = CON_ANYTIME | CON_PRINTBUFFER,
+       .index   = -1,
+};
+
+struct console * __init
+bfin_jc_early_init(unsigned int port, unsigned int cflag)
+{
+       return &bfin_jc_early_console;
+}
+#endif
+
+MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>");
+MODULE_DESCRIPTION("TTY over Blackfin JTAG Communication");
+MODULE_LICENSE("GPL");
index 1fdb9f6..f3366d3 100644 (file)
 
 #define NR_PORTS       256
 
-#define ZE_V1_NPORTS   64
 #define ZO_V1  0
 #define ZO_V2  1
 #define ZE_V1  2
 static void cy_throttle(struct tty_struct *tty);
 static void cy_send_xchar(struct tty_struct *tty, char ch);
 
-#define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1)
-
-#define Z_FPGA_CHECK(card) \
-       ((readl(&((struct RUNTIME_9060 __iomem *) \
-               ((card).ctl_addr))->init_ctrl) & (1<<17)) != 0)
-
-#define ISZLOADED(card)        (((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \
-                       ((card).ctl_addr))->mail_box_0)) || \
-                       Z_FPGA_CHECK(card)) && \
-                       (ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
-                       ((card).base_addr+ID_ADDRESS))->signature)))
-
 #ifndef SERIAL_XMIT_SIZE
 #define        SERIAL_XMIT_SIZE        (min(PAGE_SIZE, 4096))
 #endif
@@ -687,8 +674,6 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
 #define DRIVER_VERSION 0x02010203
 #define RAM_SIZE 0x80000
 
-#define Z_FPGA_LOADED(X)       ((readl(&(X)->init_ctrl) & (1<<17)) != 0)
-
 enum zblock_type {
        ZBLOCK_PRG = 0,
        ZBLOCK_FPGA = 1
@@ -883,6 +868,29 @@ static void cyz_rx_restart(unsigned long);
 static struct timer_list cyz_rx_full_timer[NR_PORTS];
 #endif                         /* CONFIG_CYZ_INTR */
 
+static inline bool cy_is_Z(struct cyclades_card *card)
+{
+       return card->num_chips == (unsigned int)-1;
+}
+
+static inline bool __cyz_fpga_loaded(struct RUNTIME_9060 __iomem *ctl_addr)
+{
+       return readl(&ctl_addr->init_ctrl) & (1 << 17);
+}
+
+static inline bool cyz_fpga_loaded(struct cyclades_card *card)
+{
+       return __cyz_fpga_loaded(card->ctl_addr.p9060);
+}
+
+static inline bool cyz_is_loaded(struct cyclades_card *card)
+{
+       struct FIRM_ID __iomem *fw_id = card->base_addr + ID_ADDRESS;
+
+       return (card->hw_ver == ZO_V1 || cyz_fpga_loaded(card)) &&
+                       readl(&fw_id->signature) == ZFIRM_ID;
+}
+
 static inline int serial_paranoia_check(struct cyclades_port *info,
                char *name, const char *routine)
 {
@@ -1395,19 +1403,15 @@ cyz_fetch_msg(struct cyclades_card *cinfo,
        unsigned long loc_doorbell;
 
        firm_id = cinfo->base_addr + ID_ADDRESS;
-       if (!ISZLOADED(*cinfo))
-               return -1;
        zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
        board_ctrl = &zfw_ctrl->board_ctrl;
 
-       loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *)
-                                 (cinfo->ctl_addr))->loc_doorbell);
+       loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell);
        if (loc_doorbell) {
                *cmd = (char)(0xff & loc_doorbell);
                *channel = readl(&board_ctrl->fwcmd_channel);
                *param = (__u32) readl(&board_ctrl->fwcmd_param);
-               cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-                         loc_doorbell, 0xffffffff);
+               cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff);
                return 1;
        }
        return 0;
@@ -1424,15 +1428,14 @@ cyz_issue_cmd(struct cyclades_card *cinfo,
        unsigned int index;
 
        firm_id = cinfo->base_addr + ID_ADDRESS;
-       if (!ISZLOADED(*cinfo))
+       if (!cyz_is_loaded(cinfo))
                return -1;
 
        zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
        board_ctrl = &zfw_ctrl->board_ctrl;
 
        index = 0;
-       pci_